luau/CLI/Reduce.cpp
Andy Friesen d2ab5df62b
Sync to upstream/release/565 (#845)
We've made a few small changes to reduce the amount of stack we use when
typechecking nested method calls (eg `foo:bar():baz():quux()`).

We've also fixed a small bytecode compiler issue that caused us to emit
redundant jump instructions in code that conditionally uses `break` or
`continue`.

On the new solver, we've switched to a new, better way to handle
augmentations to unsealed tables. We've also made some substantial
improvements to type inference and error reporting on function calls.
These things should both be on par with the old solver now.

The main improvements to the native code generator have been elimination
of some redundant type tag checks. Also, we are starting to inline
particular fastcalls directly to IR.

---------

Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-02-24 13:49:38 -08:00

512 lines
14 KiB
C++

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/Ast.h"
#include "Luau/Common.h"
#include "Luau/Parser.h"
#include "Luau/Transpiler.h"
#include "FileUtils.h"
#include <algorithm>
#include <stdio.h>
#include <string>
#include <string_view>
#include <queue>
#define VERBOSE 0 // 1 - print out commandline invocations. 2 - print out stdout
#ifdef _WIN32
const auto popen = &_popen;
const auto pclose = &_pclose;
#endif
using namespace Luau;
enum class TestResult
{
BugFound, // We encountered the bug we are trying to isolate
NoBug, // We did not encounter the bug we are trying to isolate
};
struct Enqueuer : public AstVisitor
{
std::queue<AstStatBlock*>* queue;
explicit Enqueuer(std::queue<AstStatBlock*>* queue)
: queue(queue)
{
LUAU_ASSERT(queue);
}
bool visit(AstStatBlock* block) override
{
queue->push(block);
return false;
}
};
struct Reducer
{
Allocator allocator;
AstNameTable nameTable{allocator};
ParseOptions parseOptions;
ParseResult parseResult;
AstStatBlock* root;
std::string tempScriptName;
std::string appName;
std::vector<std::string> appArgs;
std::string_view searchText;
Reducer()
{
parseOptions.captureComments = true;
}
std::string readLine(FILE* f)
{
std::string line = "";
char buffer[256];
while (fgets(buffer, sizeof(buffer), f))
{
auto len = strlen(buffer);
line += std::string(buffer, len);
if (buffer[len - 1] == '\n')
break;
}
return line;
}
void writeTempScript(bool minify = false)
{
std::string source = transpileWithTypes(*root);
if (minify)
{
size_t pos = 0;
do
{
pos = source.find("\n\n", pos);
if (pos == std::string::npos)
break;
source.erase(pos, 1);
} while (true);
}
FILE* f = fopen(tempScriptName.c_str(), "w");
if (!f)
{
printf("Unable to open temp script to %s\n", tempScriptName.c_str());
exit(2);
}
for (const HotComment& comment : parseResult.hotcomments)
fprintf(f, "--!%s\n", comment.content.c_str());
auto written = fwrite(source.data(), 1, source.size(), f);
if (written != source.size())
{
printf("??? %zu %zu\n", written, source.size());
printf("Unable to write to temp script %s\n", tempScriptName.c_str());
exit(3);
}
fclose(f);
}
int step = 0;
std::string escape(const std::string& s)
{
std::string result;
result.reserve(s.size() + 20); // guess
result += '"';
for (char c : s)
{
if (c == '"')
result += '\\';
result += c;
}
result += '"';
return result;
}
TestResult run()
{
writeTempScript();
std::string command = appName + " " + escape(tempScriptName);
for (const auto& arg : appArgs)
command += " " + escape(arg);
#if VERBOSE >= 1
printf("running %s\n", command.c_str());
#endif
TestResult result = TestResult::NoBug;
++step;
printf("Step %4d...\n", step);
FILE* p = popen(command.c_str(), "r");
while (!feof(p))
{
std::string s = readLine(p);
#if VERBOSE >= 2
printf("%s", s.c_str());
#endif
if (std::string::npos != s.find(searchText))
{
result = TestResult::BugFound;
break;
}
}
pclose(p);
return result;
}
std::vector<AstStat*> getNestedStats(AstStat* stat)
{
std::vector<AstStat*> result;
auto append = [&](AstStatBlock* block) {
if (block)
result.insert(result.end(), block->body.data, block->body.data + block->body.size);
};
if (auto block = stat->as<AstStatBlock>())
append(block);
else if (auto ifs = stat->as<AstStatIf>())
{
append(ifs->thenbody);
if (ifs->elsebody)
{
if (AstStatBlock* elseBlock = ifs->elsebody->as<AstStatBlock>())
append(elseBlock);
else if (AstStatIf* elseIf = ifs->elsebody->as<AstStatIf>())
{
auto innerStats = getNestedStats(elseIf);
result.insert(end(result), begin(innerStats), end(innerStats));
}
else
{
printf("AstStatIf's else clause can have more statement types than I thought\n");
LUAU_ASSERT(0);
}
}
}
else if (auto w = stat->as<AstStatWhile>())
append(w->body);
else if (auto r = stat->as<AstStatRepeat>())
append(r->body);
else if (auto f = stat->as<AstStatFor>())
append(f->body);
else if (auto f = stat->as<AstStatForIn>())
append(f->body);
else if (auto f = stat->as<AstStatFunction>())
append(f->func->body);
else if (auto f = stat->as<AstStatLocalFunction>())
append(f->func->body);
return result;
}
// Move new body data into allocator-managed storage so that it's safe to keep around longterm.
AstStat** reallocateStatements(const std::vector<AstStat*>& statements)
{
AstStat** newData = static_cast<AstStat**>(allocator.allocate(sizeof(AstStat*) * statements.size()));
std::copy(statements.data(), statements.data() + statements.size(), newData);
return newData;
}
// Semiopen interval
using Span = std::pair<size_t, size_t>;
// Generates 'chunks' semiopen spans of equal-ish size to span the indeces running from 0 to 'size'
// Also inverses.
std::vector<std::pair<Span, Span>> generateSpans(size_t size, size_t chunks)
{
if (size <= 1)
return {};
LUAU_ASSERT(chunks > 0);
size_t chunkLength = std::max<size_t>(1, size / chunks);
std::vector<std::pair<Span, Span>> result;
auto append = [&result](Span a, Span b) {
if (a.first == a.second && b.first == b.second)
return;
else
result.emplace_back(a, b);
};
size_t i = 0;
while (i < size)
{
size_t end = std::min(i + chunkLength, size);
append(Span{0, i}, Span{end, size});
i = end;
}
i = 0;
while (i < size)
{
size_t end = std::min(i + chunkLength, size);
append(Span{i, end}, Span{size, size});
i = end;
}
return result;
}
// Returns the statements of block within span1 and span2
// Also has the hokey restriction that span1 must come before span2
std::vector<AstStat*> prunedSpan(AstStatBlock* block, Span span1, Span span2)
{
std::vector<AstStat*> result;
for (size_t i = span1.first; i < span1.second; ++i)
result.push_back(block->body.data[i]);
for (size_t i = span2.first; i < span2.second; ++i)
result.push_back(block->body.data[i]);
return result;
}
// returns true if anything was culled plus the chunk count
std::pair<bool, size_t> deleteChildStatements(AstStatBlock* block, size_t chunkCount)
{
if (block->body.size == 0)
return {false, chunkCount};
do
{
auto permutations = generateSpans(block->body.size, chunkCount);
for (const auto& [span1, span2] : permutations)
{
auto tempStatements = prunedSpan(block, span1, span2);
AstArray<AstStat*> backupBody{tempStatements.data(), tempStatements.size()};
std::swap(block->body, backupBody);
TestResult result = run();
if (result == TestResult::BugFound)
{
// The bug still reproduces without the statements we've culled. Commit.
block->body.data = reallocateStatements(tempStatements);
return {true, std::max<size_t>(2, chunkCount - 1)};
}
else
{
// The statements we've culled are critical for the reproduction of the bug.
// TODO try promoting its contents into this scope
std::swap(block->body, backupBody);
}
}
chunkCount *= 2;
} while (chunkCount <= block->body.size);
return {false, block->body.size};
}
bool deleteChildStatements(AstStatBlock* b)
{
bool result = false;
size_t chunkCount = 2;
while (true)
{
auto [workDone, newChunkCount] = deleteChildStatements(b, chunkCount);
if (workDone)
{
result = true;
chunkCount = newChunkCount;
continue;
}
else
break;
}
return result;
}
bool tryPromotingChildStatements(AstStatBlock* b, size_t index)
{
std::vector<AstStat*> tempStats(b->body.data, b->body.data + b->body.size);
AstStat* removed = tempStats.at(index);
tempStats.erase(begin(tempStats) + index);
std::vector<AstStat*> nestedStats = getNestedStats(removed);
tempStats.insert(begin(tempStats) + index, begin(nestedStats), end(nestedStats));
AstArray<AstStat*> tempArray{tempStats.data(), tempStats.size()};
std::swap(b->body, tempArray);
TestResult result = run();
if (result == TestResult::BugFound)
{
b->body.data = reallocateStatements(tempStats);
return true;
}
else
{
std::swap(b->body, tempArray);
return false;
}
}
// We live with some weirdness because I'm kind of lazy: If a statement's
// contents are promoted, we try promoting those prometed statements right
// away. I don't think it matters: If we can delete a statement and still
// exhibit the bug, we should do so. The order isn't so important.
bool tryPromotingChildStatements(AstStatBlock* b)
{
size_t i = 0;
while (i < b->body.size)
{
bool promoted = tryPromotingChildStatements(b, i);
if (!promoted)
++i;
}
return false;
}
void walk(AstStatBlock* block)
{
std::queue<AstStatBlock*> queue;
Enqueuer enqueuer{&queue};
queue.push(block);
while (!queue.empty())
{
AstStatBlock* b = queue.front();
queue.pop();
bool result = false;
do
{
result = deleteChildStatements(b);
/* Try other reductions here before we walk into child statements
* Other reductions to try someday:
*
* Promoting a statement's children to the enclosing block.
* Deleting type annotations
* Deleting parts of type annotations
* Replacing subexpressions with ({} :: any)
* Inlining type aliases
* Inlining constants
* Inlining functions
*/
result |= tryPromotingChildStatements(b);
} while (result);
for (AstStat* stat : b->body)
stat->visit(&enqueuer);
}
}
void run(const std::string scriptName, const std::string appName, const std::vector<std::string>& appArgs, std::string_view source,
std::string_view searchText)
{
tempScriptName = scriptName;
if (tempScriptName.substr(tempScriptName.size() - 4) == ".lua")
{
tempScriptName.erase(tempScriptName.size() - 4);
tempScriptName += "-reduced.lua";
}
else
{
this->tempScriptName = scriptName + "-reduced";
}
#if 0
// Handy debugging trick: VS Code will update its view of the file in realtime as it is edited.
std::string wheee = "code " + tempScriptName;
system(wheee.c_str());
#endif
printf("Temp script: %s\n", tempScriptName.c_str());
this->appName = appName;
this->appArgs = appArgs;
this->searchText = searchText;
parseResult = Parser::parse(source.data(), source.size(), nameTable, allocator, parseOptions);
if (!parseResult.errors.empty())
{
printf("Parse errors\n");
exit(1);
}
root = parseResult.root;
const TestResult initialResult = run();
if (initialResult == TestResult::NoBug)
{
printf("Could not find failure string in the unmodified script! Check your commandline arguments\n");
exit(2);
}
walk(root);
writeTempScript(/* minify */ true);
printf("Done! Check %s\n", tempScriptName.c_str());
}
};
[[noreturn]] void help(const std::vector<std::string_view>& args)
{
printf("Syntax: %s script application \"search text\" [arguments]\n", args[0].data());
exit(1);
}
int main(int argc, char** argv)
{
const std::vector<std::string_view> args(argv, argv + argc);
if (args.size() < 4)
help(args);
for (size_t i = 1; i < args.size(); ++i)
{
if (args[i] == "--help")
help(args);
}
const std::string scriptName = argv[1];
const std::string appName = argv[2];
const std::string searchText = argv[3];
const std::vector<std::string> appArgs(begin(args) + 4, end(args));
std::optional<std::string> source = readFile(scriptName);
if (!source)
{
printf("Could not read source %s\n", argv[1]);
exit(1);
}
Reducer reducer;
reducer.run(scriptName, appName, appArgs, *source, searchText);
}