diff --git a/Analysis/include/Luau/ConstraintSolver.h b/Analysis/include/Luau/ConstraintSolver.h index e9e1e884..2feee236 100644 --- a/Analysis/include/Luau/ConstraintSolver.h +++ b/Analysis/include/Luau/ConstraintSolver.h @@ -53,7 +53,6 @@ struct ConstraintSolver NotNull builtinTypes; InternalErrorReporter iceReporter; NotNull normalizer; - NotNull reducer; // The entire set of constraints that the solver is trying to resolve. std::vector> constraints; NotNull rootScope; @@ -85,8 +84,7 @@ struct ConstraintSolver DcrLogger* logger; explicit ConstraintSolver(NotNull normalizer, NotNull rootScope, std::vector> constraints, - ModuleName moduleName, NotNull reducer, NotNull moduleResolver, std::vector requireCycles, - DcrLogger* logger); + ModuleName moduleName, NotNull moduleResolver, std::vector requireCycles, DcrLogger* logger); // Randomize the order in which to dispatch constraints void randomize(unsigned seed); @@ -219,6 +217,20 @@ struct ConstraintSolver void reportError(TypeError e); private: + + /** Helper used by tryDispatch(SubtypeConstraint) and + * tryDispatch(PackSubtypeConstraint) + * + * Attempts to unify subTy with superTy. If doing so would require unifying + * BlockedTypes, fail and block the constraint on those BlockedTypes. + * + * If unification fails, replace all free types with errorType. + * + * If unification succeeds, unblock every type changed by the unification. + */ + template + bool tryUnify(NotNull constraint, TID subTy, TID superTy); + /** * Marks a constraint as being blocked on a type or type pack. The constraint * solver will not attempt to dispatch blocked constraints until their diff --git a/Analysis/include/Luau/Normalize.h b/Analysis/include/Luau/Normalize.h index 15404707..efcb5108 100644 --- a/Analysis/include/Luau/Normalize.h +++ b/Analysis/include/Luau/Normalize.h @@ -191,12 +191,8 @@ struct NormalizedClassType // this type may contain `error`. struct NormalizedFunctionType { - NormalizedFunctionType(); - bool isTop = false; - // TODO: Remove this wrapping optional when clipping - // FFlagLuauNegatedFunctionTypes. - std::optional parts; + TypeIds parts; void resetToNever(); void resetToTop(); diff --git a/Analysis/include/Luau/Scope.h b/Analysis/include/Luau/Scope.h index 745ea47a..c3038fac 100644 --- a/Analysis/include/Luau/Scope.h +++ b/Analysis/include/Luau/Scope.h @@ -55,11 +55,11 @@ struct Scope std::optional lookup(DefId def) const; std::optional> lookupEx(Symbol sym); - std::optional lookupType(const Name& name); - std::optional lookupImportedType(const Name& moduleAlias, const Name& name); + std::optional lookupType(const Name& name) const; + std::optional lookupImportedType(const Name& moduleAlias, const Name& name) const; std::unordered_map privateTypePackBindings; - std::optional lookupPack(const Name& name); + std::optional lookupPack(const Name& name) const; // WARNING: This function linearly scans for a string key of equal value! It is thus O(n**2) std::optional linearSearchForBinding(const std::string& name, bool traverseScopeChain = true) const; diff --git a/Analysis/include/Luau/TypeInfer.h b/Analysis/include/Luau/TypeInfer.h index 68161794..7dae79c3 100644 --- a/Analysis/include/Luau/TypeInfer.h +++ b/Analysis/include/Luau/TypeInfer.h @@ -79,7 +79,8 @@ struct GlobalTypes // within a program are borrowed pointers into this set. struct TypeChecker { - explicit TypeChecker(const GlobalTypes& globals, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler); + explicit TypeChecker( + const ScopePtr& globalScope, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler); TypeChecker(const TypeChecker&) = delete; TypeChecker& operator=(const TypeChecker&) = delete; @@ -367,8 +368,7 @@ public: */ std::vector unTypePack(const ScopePtr& scope, TypePackId pack, size_t expectedLength, const Location& location); - // TODO: only const version of global scope should be available to make sure nothing else is modified inside of from users of TypeChecker - const GlobalTypes& globals; + const ScopePtr& globalScope; ModuleResolver* resolver; ModulePtr currentModule; diff --git a/Analysis/src/AstQuery.cpp b/Analysis/src/AstQuery.cpp index b0c3750b..dc07a35c 100644 --- a/Analysis/src/AstQuery.cpp +++ b/Analysis/src/AstQuery.cpp @@ -11,8 +11,6 @@ #include -LUAU_FASTFLAG(LuauCompleteTableKeysBetter); - namespace Luau { @@ -31,24 +29,12 @@ struct AutocompleteNodeFinder : public AstVisitor bool visit(AstExpr* expr) override { - if (FFlag::LuauCompleteTableKeysBetter) + if (expr->location.begin <= pos && pos <= expr->location.end) { - if (expr->location.begin <= pos && pos <= expr->location.end) - { - ancestry.push_back(expr); - return true; - } - return false; - } - else - { - if (expr->location.begin < pos && pos <= expr->location.end) - { - ancestry.push_back(expr); - return true; - } - return false; + ancestry.push_back(expr); + return true; } + return false; } bool visit(AstStat* stat) override diff --git a/Analysis/src/Autocomplete.cpp b/Analysis/src/Autocomplete.cpp index 1df4d3d7..3fdd9319 100644 --- a/Analysis/src/Autocomplete.cpp +++ b/Analysis/src/Autocomplete.cpp @@ -13,7 +13,6 @@ #include #include -LUAU_FASTFLAGVARIABLE(LuauCompleteTableKeysBetter, false); LUAU_FASTFLAGVARIABLE(LuauAutocompleteSkipNormalization, false); static const std::unordered_set kStatementStartingKeywords = { @@ -981,25 +980,14 @@ T* extractStat(const std::vector& ancestry) AstNode* grandParent = ancestry.size() >= 3 ? ancestry.rbegin()[2] : nullptr; AstNode* greatGrandParent = ancestry.size() >= 4 ? ancestry.rbegin()[3] : nullptr; - if (FFlag::LuauCompleteTableKeysBetter) - { - if (!grandParent) - return nullptr; + if (!grandParent) + return nullptr; - if (T* t = parent->as(); t && grandParent->is()) - return t; + if (T* t = parent->as(); t && grandParent->is()) + return t; - if (!greatGrandParent) - return nullptr; - } - else - { - if (T* t = parent->as(); t && parent->is()) - return t; - - if (!grandParent || !greatGrandParent) - return nullptr; - } + if (!greatGrandParent) + return nullptr; if (T* t = greatGrandParent->as(); t && grandParent->is() && parent->is() && isIdentifier(node)) return t; @@ -1533,23 +1521,20 @@ static AutocompleteResult autocomplete(const SourceModule& sourceModule, const M { auto result = autocompleteProps(*module, typeArena, builtinTypes, *it, PropIndexType::Key, ancestry); - if (FFlag::LuauCompleteTableKeysBetter) - { - if (auto nodeIt = module->astExpectedTypes.find(node->asExpr())) - autocompleteStringSingleton(*nodeIt, !node->is(), result); + if (auto nodeIt = module->astExpectedTypes.find(node->asExpr())) + autocompleteStringSingleton(*nodeIt, !node->is(), result); - if (!key) + if (!key) + { + // If there is "no key," it may be that the user + // intends for the current token to be the key, but + // has yet to type the `=` sign. + // + // If the key type is a union of singleton strings, + // suggest those too. + if (auto ttv = get(follow(*it)); ttv && ttv->indexer) { - // If there is "no key," it may be that the user - // intends for the current token to be the key, but - // has yet to type the `=` sign. - // - // If the key type is a union of singleton strings, - // suggest those too. - if (auto ttv = get(follow(*it)); ttv && ttv->indexer) - { - autocompleteStringSingleton(ttv->indexer->indexType, false, result); - } + autocompleteStringSingleton(ttv->indexer->indexType, false, result); } } diff --git a/Analysis/src/BuiltinDefinitions.cpp b/Analysis/src/BuiltinDefinitions.cpp index d2ace49b..2108b160 100644 --- a/Analysis/src/BuiltinDefinitions.cpp +++ b/Analysis/src/BuiltinDefinitions.cpp @@ -15,8 +15,6 @@ #include -LUAU_FASTFLAGVARIABLE(LuauDeprecateTableGetnForeach, false) - /** FIXME: Many of these type definitions are not quite completely accurate. * * Some of them require richer generics than we have. For instance, we do not yet have a way to talk @@ -298,13 +296,10 @@ void registerBuiltinGlobals(TypeChecker& typeChecker, GlobalTypes& globals) ttv->props["freeze"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.freeze"); ttv->props["clone"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.clone"); - if (FFlag::LuauDeprecateTableGetnForeach) - { - ttv->props["getn"].deprecated = true; - ttv->props["getn"].deprecatedSuggestion = "#"; - ttv->props["foreach"].deprecated = true; - ttv->props["foreachi"].deprecated = true; - } + ttv->props["getn"].deprecated = true; + ttv->props["getn"].deprecatedSuggestion = "#"; + ttv->props["foreach"].deprecated = true; + ttv->props["foreachi"].deprecated = true; attachMagicFunction(ttv->props["pack"].type, magicFunctionPack); attachDcrMagicFunction(ttv->props["pack"].type, dcrMagicFunctionPack); @@ -401,15 +396,13 @@ void registerBuiltinGlobals(Frontend& frontend) ttv->props["freeze"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.freeze"); ttv->props["clone"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.clone"); - if (FFlag::LuauDeprecateTableGetnForeach) - { - ttv->props["getn"].deprecated = true; - ttv->props["getn"].deprecatedSuggestion = "#"; - ttv->props["foreach"].deprecated = true; - ttv->props["foreachi"].deprecated = true; - } + ttv->props["getn"].deprecated = true; + ttv->props["getn"].deprecatedSuggestion = "#"; + ttv->props["foreach"].deprecated = true; + ttv->props["foreachi"].deprecated = true; attachMagicFunction(ttv->props["pack"].type, magicFunctionPack); + attachDcrMagicFunction(ttv->props["pack"].type, dcrMagicFunctionPack); } attachMagicFunction(getGlobalBinding(globals, "require"), magicFunctionRequire); diff --git a/Analysis/src/ConstraintSolver.cpp b/Analysis/src/ConstraintSolver.cpp index d5853932..d2bed2da 100644 --- a/Analysis/src/ConstraintSolver.cpp +++ b/Analysis/src/ConstraintSolver.cpp @@ -226,12 +226,10 @@ void dump(ConstraintSolver* cs, ToStringOptions& opts) } ConstraintSolver::ConstraintSolver(NotNull normalizer, NotNull rootScope, std::vector> constraints, - ModuleName moduleName, NotNull reducer, NotNull moduleResolver, std::vector requireCycles, - DcrLogger* logger) + ModuleName moduleName, NotNull moduleResolver, std::vector requireCycles, DcrLogger* logger) : arena(normalizer->arena) , builtinTypes(normalizer->builtinTypes) , normalizer(normalizer) - , reducer(reducer) , constraints(std::move(constraints)) , rootScope(rootScope) , currentModuleName(std::move(moduleName)) @@ -458,40 +456,7 @@ bool ConstraintSolver::tryDispatch(const SubtypeConstraint& c, NotNullscope, Location{}, Covariant}; - u.useScopes = true; - - u.tryUnify(c.subType, c.superType); - - if (!u.blockedTypes.empty() || !u.blockedTypePacks.empty()) - { - for (TypeId bt : u.blockedTypes) - block(bt, constraint); - for (TypePackId btp : u.blockedTypePacks) - block(btp, constraint); - return false; - } - - if (const auto& e = hasUnificationTooComplex(u.errors)) - reportError(*e); - - if (!u.errors.empty()) - { - TypeId errorType = errorRecoveryType(); - u.tryUnify(c.subType, errorType); - u.tryUnify(c.superType, errorType); - } - - const auto [changedTypes, changedPacks] = u.log.getChanges(); - - u.log.commit(); - - unblock(changedTypes); - unblock(changedPacks); - - // unify(c.subType, c.superType, constraint->scope); - - return true; + return tryUnify(constraint, c.subType, c.superType); } bool ConstraintSolver::tryDispatch(const PackSubtypeConstraint& c, NotNull constraint, bool force) @@ -501,9 +466,7 @@ bool ConstraintSolver::tryDispatch(const PackSubtypeConstraint& c, NotNullscope); - - return true; + return tryUnify(constraint, c.subPack, c.superPack); } bool ConstraintSolver::tryDispatch(const GeneralizationConstraint& c, NotNull constraint, bool force) @@ -1117,7 +1080,7 @@ bool ConstraintSolver::tryDispatch(const TypeAliasExpansionConstraint& c, NotNul InstantiationQueuer queuer{constraint->scope, constraint->location, this}; queuer.traverse(target); - if (target->persistent) + if (target->persistent || target->owningArena != arena) { bindResult(target); return true; @@ -1335,8 +1298,6 @@ bool ConstraintSolver::tryDispatch(const HasPropConstraint& c, NotNullreduce(subjectType).value_or(subjectType); - auto [blocked, result] = lookupTableProp(subjectType, c.prop); if (!blocked.empty()) { @@ -1716,8 +1677,15 @@ bool ConstraintSolver::tryDispatchIterableTable(TypeId iteratorTy, const Iterabl if (auto iteratorTable = get(iteratorTy)) { - if (iteratorTable->state == TableState::Free) - return block_(iteratorTy); + /* + * We try not to dispatch IterableConstraints over free tables because + * it's possible that there are other constraints on the table that will + * clarify what we should do. + * + * We should eventually introduce a type family to talk about iteration. + */ + if (iteratorTable->state == TableState::Free && !force) + return block(iteratorTy, constraint); if (iteratorTable->indexer) { @@ -1957,14 +1925,14 @@ std::pair, std::optional> ConstraintSolver::lookupTa else if (auto utv = get(subjectType)) { std::vector blocked; - std::vector options; + std::set options; for (TypeId ty : utv) { auto [innerBlocked, innerResult] = lookupTableProp(ty, propName, seen); blocked.insert(blocked.end(), innerBlocked.begin(), innerBlocked.end()); if (innerResult) - options.push_back(*innerResult); + options.insert(*innerResult); } if (!blocked.empty()) @@ -1973,21 +1941,21 @@ std::pair, std::optional> ConstraintSolver::lookupTa if (options.empty()) return {{}, std::nullopt}; else if (options.size() == 1) - return {{}, options[0]}; + return {{}, *begin(options)}; else - return {{}, arena->addType(UnionType{std::move(options)})}; + return {{}, arena->addType(UnionType{std::vector(begin(options), end(options))})}; } else if (auto itv = get(subjectType)) { std::vector blocked; - std::vector options; + std::set options; for (TypeId ty : itv) { auto [innerBlocked, innerResult] = lookupTableProp(ty, propName, seen); blocked.insert(blocked.end(), innerBlocked.begin(), innerBlocked.end()); if (innerResult) - options.push_back(*innerResult); + options.insert(*innerResult); } if (!blocked.empty()) @@ -1996,14 +1964,61 @@ std::pair, std::optional> ConstraintSolver::lookupTa if (options.empty()) return {{}, std::nullopt}; else if (options.size() == 1) - return {{}, options[0]}; + return {{}, *begin(options)}; else - return {{}, arena->addType(IntersectionType{std::move(options)})}; + return {{}, arena->addType(IntersectionType{std::vector(begin(options), end(options))})}; } return {{}, std::nullopt}; } +static TypeId getErrorType(NotNull builtinTypes, TypeId) +{ + return builtinTypes->errorRecoveryType(); +} + +static TypePackId getErrorType(NotNull builtinTypes, TypePackId) +{ + return builtinTypes->errorRecoveryTypePack(); +} + +template +bool ConstraintSolver::tryUnify(NotNull constraint, TID subTy, TID superTy) +{ + Unifier u{normalizer, Mode::Strict, constraint->scope, Location{}, Covariant}; + u.useScopes = true; + + u.tryUnify(subTy, superTy); + + if (!u.blockedTypes.empty() || !u.blockedTypePacks.empty()) + { + for (TypeId bt : u.blockedTypes) + block(bt, constraint); + for (TypePackId btp : u.blockedTypePacks) + block(btp, constraint); + return false; + } + + if (const auto& e = hasUnificationTooComplex(u.errors)) + reportError(*e); + + if (!u.errors.empty()) + { + TID errorType = getErrorType(builtinTypes, TID{}); + u.tryUnify(subTy, errorType); + u.tryUnify(superTy, errorType); + } + + const auto [changedTypes, changedPacks] = u.log.getChanges(); + + u.log.commit(); + + unblock(changedTypes); + unblock(changedPacks); + + return true; +} + void ConstraintSolver::block_(BlockedConstraintId target, NotNull constraint) { blocked[target].push_back(constraint); diff --git a/Analysis/src/Frontend.cpp b/Analysis/src/Frontend.cpp index a50933b7..191e94f4 100644 --- a/Analysis/src/Frontend.cpp +++ b/Analysis/src/Frontend.cpp @@ -435,8 +435,8 @@ Frontend::Frontend(FileResolver* fileResolver, ConfigResolver* configResolver, c , moduleResolverForAutocomplete(this) , globals(builtinTypes) , globalsForAutocomplete(builtinTypes) - , typeChecker(globals, &moduleResolver, builtinTypes, &iceHandler) - , typeCheckerForAutocomplete(globalsForAutocomplete, &moduleResolverForAutocomplete, builtinTypes, &iceHandler) + , typeChecker(globals.globalScope, &moduleResolver, builtinTypes, &iceHandler) + , typeCheckerForAutocomplete(globalsForAutocomplete.globalScope, &moduleResolverForAutocomplete, builtinTypes, &iceHandler) , configResolver(configResolver) , options(options) { @@ -970,8 +970,8 @@ ModulePtr check(const SourceModule& sourceModule, const std::vectorerrors = std::move(cgb.errors); - ConstraintSolver cs{NotNull{&normalizer}, NotNull(cgb.rootScope), borrowConstraints(cgb.constraints), sourceModule.name, - NotNull{result->reduction.get()}, moduleResolver, requireCycles, logger.get()}; + ConstraintSolver cs{NotNull{&normalizer}, NotNull(cgb.rootScope), borrowConstraints(cgb.constraints), sourceModule.name, moduleResolver, + requireCycles, logger.get()}; if (options.randomizeConstraintResolutionSeed) cs.randomize(*options.randomizeConstraintResolutionSeed); diff --git a/Analysis/src/Linter.cpp b/Analysis/src/Linter.cpp index f850bd3d..d6aafda6 100644 --- a/Analysis/src/Linter.cpp +++ b/Analysis/src/Linter.cpp @@ -14,8 +14,6 @@ LUAU_FASTINTVARIABLE(LuauSuggestionDistance, 4) -LUAU_FASTFLAGVARIABLE(LuauImproveDeprecatedApiLint, false) - namespace Luau { @@ -2102,9 +2100,6 @@ class LintDeprecatedApi : AstVisitor public: LUAU_NOINLINE static void process(LintContext& context) { - if (!FFlag::LuauImproveDeprecatedApiLint && !context.module) - return; - LintDeprecatedApi pass{&context}; context.root->visit(&pass); } @@ -2122,8 +2117,7 @@ private: if (std::optional ty = context->getType(node->expr)) check(node, follow(*ty)); else if (AstExprGlobal* global = node->expr->as()) - if (FFlag::LuauImproveDeprecatedApiLint) - check(node->location, global->name, node->index); + check(node->location, global->name, node->index); return true; } @@ -2144,7 +2138,7 @@ private: if (prop != tty->props.end() && prop->second.deprecated) { // strip synthetic typeof() for builtin tables - if (FFlag::LuauImproveDeprecatedApiLint && tty->name && tty->name->compare(0, 7, "typeof(") == 0 && tty->name->back() == ')') + if (tty->name && tty->name->compare(0, 7, "typeof(") == 0 && tty->name->back() == ')') report(node->location, prop->second, tty->name->substr(7, tty->name->length() - 8).c_str(), node->index.value); else report(node->location, prop->second, tty->name ? tty->name->c_str() : nullptr, node->index.value); diff --git a/Analysis/src/Normalize.cpp b/Analysis/src/Normalize.cpp index f383f5ea..7c56a4b8 100644 --- a/Analysis/src/Normalize.cpp +++ b/Analysis/src/Normalize.cpp @@ -18,7 +18,6 @@ LUAU_FASTFLAGVARIABLE(DebugLuauCheckNormalizeInvariant, false) LUAU_FASTINTVARIABLE(LuauNormalizeIterationLimit, 1200); LUAU_FASTINTVARIABLE(LuauNormalizeCacheLimit, 100000); LUAU_FASTFLAGVARIABLE(LuauNegatedClassTypes, false); -LUAU_FASTFLAGVARIABLE(LuauNegatedFunctionTypes, false); LUAU_FASTFLAGVARIABLE(LuauNegatedTableTypes, false); LUAU_FASTFLAGVARIABLE(LuauNormalizeBlockedTypes, false); LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution) @@ -202,26 +201,21 @@ bool NormalizedClassType::isNever() const return classes.empty(); } -NormalizedFunctionType::NormalizedFunctionType() - : parts(FFlag::LuauNegatedFunctionTypes ? std::optional{TypeIds{}} : std::nullopt) -{ -} - void NormalizedFunctionType::resetToTop() { isTop = true; - parts.emplace(); + parts.clear(); } void NormalizedFunctionType::resetToNever() { isTop = false; - parts.emplace(); + parts.clear(); } bool NormalizedFunctionType::isNever() const { - return !isTop && (!parts || parts->empty()); + return !isTop && parts.empty(); } NormalizedType::NormalizedType(NotNull builtinTypes) @@ -438,13 +432,10 @@ static bool isNormalizedThread(TypeId ty) static bool areNormalizedFunctions(const NormalizedFunctionType& tys) { - if (tys.parts) + for (TypeId ty : tys.parts) { - for (TypeId ty : *tys.parts) - { - if (!get(ty) && !get(ty)) - return false; - } + if (!get(ty) && !get(ty)) + return false; } return true; } @@ -1170,13 +1161,10 @@ std::optional Normalizer::unionOfFunctions(TypeId here, TypeId there) void Normalizer::unionFunctions(NormalizedFunctionType& heres, const NormalizedFunctionType& theres) { - if (FFlag::LuauNegatedFunctionTypes) - { - if (heres.isTop) - return; - if (theres.isTop) - heres.resetToTop(); - } + if (heres.isTop) + return; + if (theres.isTop) + heres.resetToTop(); if (theres.isNever()) return; @@ -1185,13 +1173,13 @@ void Normalizer::unionFunctions(NormalizedFunctionType& heres, const NormalizedF if (heres.isNever()) { - tmps.insert(theres.parts->begin(), theres.parts->end()); + tmps.insert(theres.parts.begin(), theres.parts.end()); heres.parts = std::move(tmps); return; } - for (TypeId here : *heres.parts) - for (TypeId there : *theres.parts) + for (TypeId here : heres.parts) + for (TypeId there : theres.parts) { if (std::optional fun = unionOfFunctions(here, there)) tmps.insert(*fun); @@ -1213,7 +1201,7 @@ void Normalizer::unionFunctionsWithFunction(NormalizedFunctionType& heres, TypeI } TypeIds tmps; - for (TypeId here : *heres.parts) + for (TypeId here : heres.parts) { if (std::optional fun = unionOfFunctions(here, there)) tmps.insert(*fun); @@ -1420,7 +1408,6 @@ bool Normalizer::unionNormalWithTy(NormalizedType& here, TypeId there, int ignor here.threads = there; else if (ptv->type == PrimitiveType::Function) { - LUAU_ASSERT(FFlag::LuauNegatedFunctionTypes); here.functions.resetToTop(); } else if (ptv->type == PrimitiveType::Table && FFlag::LuauNegatedTableTypes) @@ -1553,15 +1540,12 @@ std::optional Normalizer::negateNormal(const NormalizedType& her * arbitrary function types. Ordinary code can never form these kinds of * types, so we decline to negate them. */ - if (FFlag::LuauNegatedFunctionTypes) - { - if (here.functions.isNever()) - result.functions.resetToTop(); - else if (here.functions.isTop) - result.functions.resetToNever(); - else - return std::nullopt; - } + if (here.functions.isNever()) + result.functions.resetToTop(); + else if (here.functions.isTop) + result.functions.resetToNever(); + else + return std::nullopt; /* * It is not possible to negate an arbitrary table type, because function @@ -2390,15 +2374,15 @@ void Normalizer::intersectFunctionsWithFunction(NormalizedFunctionType& heres, T heres.isTop = false; - for (auto it = heres.parts->begin(); it != heres.parts->end();) + for (auto it = heres.parts.begin(); it != heres.parts.end();) { TypeId here = *it; if (get(here)) it++; else if (std::optional tmp = intersectionOfFunctions(here, there)) { - heres.parts->erase(it); - heres.parts->insert(*tmp); + heres.parts.erase(it); + heres.parts.insert(*tmp); return; } else @@ -2406,13 +2390,13 @@ void Normalizer::intersectFunctionsWithFunction(NormalizedFunctionType& heres, T } TypeIds tmps; - for (TypeId here : *heres.parts) + for (TypeId here : heres.parts) { if (std::optional tmp = unionSaturatedFunctions(here, there)) tmps.insert(*tmp); } - heres.parts->insert(there); - heres.parts->insert(tmps.begin(), tmps.end()); + heres.parts.insert(there); + heres.parts.insert(tmps.begin(), tmps.end()); } void Normalizer::intersectFunctions(NormalizedFunctionType& heres, const NormalizedFunctionType& theres) @@ -2426,7 +2410,7 @@ void Normalizer::intersectFunctions(NormalizedFunctionType& heres, const Normali } else { - for (TypeId there : *theres.parts) + for (TypeId there : theres.parts) intersectFunctionsWithFunction(heres, there); } } @@ -2621,10 +2605,7 @@ bool Normalizer::intersectNormalWithTy(NormalizedType& here, TypeId there) else if (ptv->type == PrimitiveType::Thread) here.threads = threads; else if (ptv->type == PrimitiveType::Function) - { - LUAU_ASSERT(FFlag::LuauNegatedFunctionTypes); here.functions = std::move(functions); - } else if (ptv->type == PrimitiveType::Table) { LUAU_ASSERT(FFlag::LuauNegatedTableTypes); @@ -2768,16 +2749,16 @@ TypeId Normalizer::typeFromNormal(const NormalizedType& norm) if (!get(norm.errors)) result.push_back(norm.errors); - if (FFlag::LuauNegatedFunctionTypes && norm.functions.isTop) + if (norm.functions.isTop) result.push_back(builtinTypes->functionType); else if (!norm.functions.isNever()) { - if (norm.functions.parts->size() == 1) - result.push_back(*norm.functions.parts->begin()); + if (norm.functions.parts.size() == 1) + result.push_back(*norm.functions.parts.begin()); else { std::vector parts; - parts.insert(parts.end(), norm.functions.parts->begin(), norm.functions.parts->end()); + parts.insert(parts.end(), norm.functions.parts.begin(), norm.functions.parts.end()); result.push_back(arena->addType(IntersectionType{std::move(parts)})); } } diff --git a/Analysis/src/Scope.cpp b/Analysis/src/Scope.cpp index f54ebe2a..2de381be 100644 --- a/Analysis/src/Scope.cpp +++ b/Analysis/src/Scope.cpp @@ -65,7 +65,7 @@ std::optional Scope::lookup(DefId def) const return std::nullopt; } -std::optional Scope::lookupType(const Name& name) +std::optional Scope::lookupType(const Name& name) const { const Scope* scope = this; while (true) @@ -85,7 +85,7 @@ std::optional Scope::lookupType(const Name& name) } } -std::optional Scope::lookupImportedType(const Name& moduleAlias, const Name& name) +std::optional Scope::lookupImportedType(const Name& moduleAlias, const Name& name) const { const Scope* scope = this; while (scope) @@ -110,7 +110,7 @@ std::optional Scope::lookupImportedType(const Name& moduleAlias, const return std::nullopt; } -std::optional Scope::lookupPack(const Name& name) +std::optional Scope::lookupPack(const Name& name) const { const Scope* scope = this; while (true) diff --git a/Analysis/src/TypeChecker2.cpp b/Analysis/src/TypeChecker2.cpp index ec71a583..c7d30f43 100644 --- a/Analysis/src/TypeChecker2.cpp +++ b/Analysis/src/TypeChecker2.cpp @@ -2075,12 +2075,12 @@ struct TypeChecker2 fetch(builtinTypes->functionType); else if (!norm.functions.isNever()) { - if (norm.functions.parts->size() == 1) - fetch(norm.functions.parts->front()); + if (norm.functions.parts.size() == 1) + fetch(norm.functions.parts.front()); else { std::vector parts; - parts.insert(parts.end(), norm.functions.parts->begin(), norm.functions.parts->end()); + parts.insert(parts.end(), norm.functions.parts.begin(), norm.functions.parts.end()); fetch(testArena.addType(IntersectionType{std::move(parts)})); } } diff --git a/Analysis/src/TypeInfer.cpp b/Analysis/src/TypeInfer.cpp index 48ff6a20..f4781558 100644 --- a/Analysis/src/TypeInfer.cpp +++ b/Analysis/src/TypeInfer.cpp @@ -26,7 +26,6 @@ #include LUAU_FASTFLAGVARIABLE(DebugLuauMagicTypes, false) -LUAU_FASTFLAGVARIABLE(LuauDontExtendUnsealedRValueTables, false) LUAU_FASTINTVARIABLE(LuauTypeInferRecursionLimit, 165) LUAU_FASTINTVARIABLE(LuauTypeInferIterationLimit, 20000) LUAU_FASTINTVARIABLE(LuauTypeInferTypePackLoopLimit, 5000) @@ -38,7 +37,6 @@ LUAU_FASTFLAGVARIABLE(LuauReturnAnyInsteadOfICE, false) // Eventually removed as LUAU_FASTFLAGVARIABLE(DebugLuauSharedSelf, false) LUAU_FASTFLAGVARIABLE(LuauTryhardAnd, false) LUAU_FASTFLAG(LuauInstantiateInSubtyping) -LUAU_FASTFLAGVARIABLE(LuauIntersectionTestForEquality, false) LUAU_FASTFLAG(LuauNegatedClassTypes) LUAU_FASTFLAGVARIABLE(LuauAllowIndexClassParameters, false) LUAU_FASTFLAG(LuauUninhabitedSubAnything2) @@ -228,8 +226,8 @@ GlobalTypes::GlobalTypes(NotNull builtinTypes) globalScope->addBuiltinTypeBinding("never", TypeFun{{}, builtinTypes->neverType}); } -TypeChecker::TypeChecker(const GlobalTypes& globals, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler) - : globals(globals) +TypeChecker::TypeChecker(const ScopePtr& globalScope, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler) + : globalScope(globalScope) , resolver(resolver) , builtinTypes(builtinTypes) , iceHandler(iceHandler) @@ -280,7 +278,7 @@ ModulePtr TypeChecker::checkWithoutRecursionCheck(const SourceModule& module, Mo unifierState.counters.recursionLimit = FInt::LuauTypeInferRecursionLimit; unifierState.counters.iterationLimit = unifierIterationLimit ? *unifierIterationLimit : FInt::LuauTypeInferIterationLimit; - ScopePtr parentScope = environmentScope.value_or(globals.globalScope); + ScopePtr parentScope = environmentScope.value_or(globalScope); ScopePtr moduleScope = std::make_shared(parentScope); if (module.cyclic) @@ -1656,7 +1654,7 @@ void TypeChecker::prototype(const ScopePtr& scope, const AstStatTypeAlias& typea } else { - if (globals.globalScope->builtinTypeNames.contains(name)) + if (globalScope->builtinTypeNames.contains(name)) { reportError(typealias.location, DuplicateTypeDefinition{name}); duplicateTypeAliases.insert({typealias.exported, name}); @@ -2690,7 +2688,7 @@ TypeId TypeChecker::checkRelationalOperation( if (get(lhsType) || get(rhsType)) return booleanType; - if (FFlag::LuauIntersectionTestForEquality && isEquality) + if (isEquality) { // Unless either type is free or any, an equality comparison is only // valid when the intersection of the two operands is non-empty. @@ -3261,16 +3259,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex { return it->second.type; } - else if (!FFlag::LuauDontExtendUnsealedRValueTables && (lhsTable->state == TableState::Unsealed || lhsTable->state == TableState::Free)) - { - TypeId theType = freshType(scope); - Property& property = lhsTable->props[name]; - property.type = theType; - property.location = expr.indexLocation; - return theType; - } - else if (FFlag::LuauDontExtendUnsealedRValueTables && - ((ctx == ValueContext::LValue && lhsTable->state == TableState::Unsealed) || lhsTable->state == TableState::Free)) + else if ((ctx == ValueContext::LValue && lhsTable->state == TableState::Unsealed) || lhsTable->state == TableState::Free) { TypeId theType = freshType(scope); Property& property = lhsTable->props[name]; @@ -3391,16 +3380,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex { return it->second.type; } - else if (!FFlag::LuauDontExtendUnsealedRValueTables && (exprTable->state == TableState::Unsealed || exprTable->state == TableState::Free)) - { - TypeId resultType = freshType(scope); - Property& property = exprTable->props[value->value.data]; - property.type = resultType; - property.location = expr.index->location; - return resultType; - } - else if (FFlag::LuauDontExtendUnsealedRValueTables && - ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free)) + else if ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free) { TypeId resultType = freshType(scope); Property& property = exprTable->props[value->value.data]; @@ -3416,14 +3396,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex unify(indexType, indexer.indexType, scope, expr.index->location); return indexer.indexResultType; } - else if (!FFlag::LuauDontExtendUnsealedRValueTables && (exprTable->state == TableState::Unsealed || exprTable->state == TableState::Free)) - { - TypeId resultType = freshType(exprTable->level); - exprTable->indexer = TableIndexer{anyIfNonstrict(indexType), anyIfNonstrict(resultType)}; - return resultType; - } - else if (FFlag::LuauDontExtendUnsealedRValueTables && - ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free)) + else if ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free) { TypeId indexerType = freshType(exprTable->level); unify(indexType, indexerType, scope, expr.location); @@ -3439,13 +3412,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex * has no indexer, we have no idea if it will work so we just return any * and hope for the best. */ - if (FFlag::LuauDontExtendUnsealedRValueTables) - return anyType; - else - { - TypeId resultType = freshType(scope); - return resultType; - } + return anyType; } } @@ -5997,7 +5964,7 @@ void TypeChecker::resolve(const TypeGuardPredicate& typeguardP, RefinementMap& r if (!typeguardP.isTypeof) return addRefinement(refis, typeguardP.lvalue, errorRecoveryType(scope)); - auto typeFun = globals.globalScope->lookupType(typeguardP.kind); + auto typeFun = globalScope->lookupType(typeguardP.kind); if (!typeFun || !typeFun->typeParams.empty() || !typeFun->typePackParams.empty()) return addRefinement(refis, typeguardP.lvalue, errorRecoveryType(scope)); diff --git a/Analysis/src/Unifier.cpp b/Analysis/src/Unifier.cpp index 5f01a606..b748d115 100644 --- a/Analysis/src/Unifier.cpp +++ b/Analysis/src/Unifier.cpp @@ -21,11 +21,9 @@ LUAU_FASTFLAGVARIABLE(LuauInstantiateInSubtyping, false) LUAU_FASTFLAGVARIABLE(LuauUninhabitedSubAnything2, false) LUAU_FASTFLAGVARIABLE(LuauMaintainScopesInUnifier, false) LUAU_FASTFLAGVARIABLE(LuauTransitiveSubtyping, false) -LUAU_FASTFLAGVARIABLE(LuauTinyUnifyNormalsFix, false) LUAU_FASTFLAG(LuauClassTypeVarsInSubstitution) LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution) LUAU_FASTFLAG(LuauNormalizeBlockedTypes) -LUAU_FASTFLAG(LuauNegatedFunctionTypes) LUAU_FASTFLAG(LuauNegatedClassTypes) LUAU_FASTFLAG(LuauNegatedTableTypes) @@ -615,8 +613,7 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool else if ((log.getMutable(superTy) || log.getMutable(superTy)) && log.getMutable(subTy)) tryUnifySingletons(subTy, superTy); - else if (auto ptv = get(superTy); - FFlag::LuauNegatedFunctionTypes && ptv && ptv->type == PrimitiveType::Function && get(subTy)) + else if (auto ptv = get(superTy); ptv && ptv->type == PrimitiveType::Function && get(subTy)) { // Ok. Do nothing. forall functions F, F <: function } @@ -1275,17 +1272,7 @@ void Unifier::tryUnifyNormalizedTypes( Unifier innerState = makeChildUnifier(); - if (FFlag::LuauTinyUnifyNormalsFix) - innerState.tryUnify(subTable, superTable); - else - { - if (get(superTable)) - innerState.tryUnifyWithMetatable(subTable, superTable, /* reversed */ false); - else if (get(subTable)) - innerState.tryUnifyWithMetatable(superTable, subTable, /* reversed */ true); - else - innerState.tryUnifyTables(subTable, superTable); - } + innerState.tryUnify(subTable, superTable); if (innerState.errors.empty()) { @@ -1304,7 +1291,7 @@ void Unifier::tryUnifyNormalizedTypes( { if (superNorm.functions.isNever()) return reportError(location, TypeMismatch{superTy, subTy, reason, error, mismatchContext()}); - for (TypeId superFun : *superNorm.functions.parts) + for (TypeId superFun : superNorm.functions.parts) { Unifier innerState = makeChildUnifier(); const FunctionType* superFtv = get(superFun); @@ -1343,7 +1330,7 @@ TypePackId Unifier::tryApplyOverloadedFunction(TypeId function, const Normalized std::optional result; const FunctionType* firstFun = nullptr; - for (TypeId overload : *overloads.parts) + for (TypeId overload : overloads.parts) { if (const FunctionType* ftv = get(overload)) { diff --git a/Ast/src/Lexer.cpp b/Ast/src/Lexer.cpp index dac3b95b..75b4fe30 100644 --- a/Ast/src/Lexer.cpp +++ b/Ast/src/Lexer.cpp @@ -6,8 +6,6 @@ #include -LUAU_FASTFLAGVARIABLE(LuauFixInterpStringMid, false) - namespace Luau { @@ -642,9 +640,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT } consume(); - Lexeme lexemeOutput(Location(start, position()), FFlag::LuauFixInterpStringMid ? formatType : Lexeme::InterpStringBegin, - &buffer[startOffset], offset - startOffset - 1); - return lexemeOutput; + return Lexeme(Location(start, position()), formatType, &buffer[startOffset], offset - startOffset - 1); } default: diff --git a/CodeGen/include/Luau/AddressA64.h b/CodeGen/include/Luau/AddressA64.h index 2c852046..2796ef70 100644 --- a/CodeGen/include/Luau/AddressA64.h +++ b/CodeGen/include/Luau/AddressA64.h @@ -3,6 +3,8 @@ #include "Luau/RegisterA64.h" +#include + namespace Luau { namespace CodeGen @@ -23,6 +25,10 @@ enum class AddressKindA64 : uint8_t struct AddressA64 { + // This is a little misleading since AddressA64 can encode offsets up to 1023*size where size depends on the load/store size + // For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB + static constexpr size_t kMaxOffset = 1023; + AddressA64(RegisterA64 base, int off = 0) : kind(AddressKindA64::imm) , base(base) @@ -30,7 +36,6 @@ struct AddressA64 , data(off) { LUAU_ASSERT(base.kind == KindA64::x || base == sp); - LUAU_ASSERT(off >= -256 && off < 4096); } AddressA64(RegisterA64 base, RegisterA64 offset) diff --git a/CodeGen/include/Luau/AssemblyBuilderA64.h b/CodeGen/include/Luau/AssemblyBuilderA64.h index 1190e975..0c738712 100644 --- a/CodeGen/include/Luau/AssemblyBuilderA64.h +++ b/CodeGen/include/Luau/AssemblyBuilderA64.h @@ -16,10 +16,15 @@ namespace CodeGen namespace A64 { +enum FeaturesA64 +{ + Feature_JSCVT = 1 << 0, +}; + class AssemblyBuilderA64 { public: - explicit AssemblyBuilderA64(bool logText); + explicit AssemblyBuilderA64(bool logText, unsigned int features = 0); ~AssemblyBuilderA64(); // Moves @@ -42,6 +47,7 @@ public: // Note: some arithmetic instructions also have versions that update flags (ADDS etc) but we aren't using them atm void cmp(RegisterA64 src1, RegisterA64 src2); void cmp(RegisterA64 src1, uint16_t src2); + void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond); // Bitwise // Note: shifted-register support and bitfield operations are omitted for simplicity @@ -93,6 +99,36 @@ public: // Address of code (label) void adr(RegisterA64 dst, Label& label); + // Floating-point scalar moves + void fmov(RegisterA64 dst, RegisterA64 src); + + // Floating-point scalar math + void fabs(RegisterA64 dst, RegisterA64 src); + void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void fneg(RegisterA64 dst, RegisterA64 src); + void fsqrt(RegisterA64 dst, RegisterA64 src); + void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + + // Floating-point rounding and conversions + void frinta(RegisterA64 dst, RegisterA64 src); + void frintm(RegisterA64 dst, RegisterA64 src); + void frintp(RegisterA64 dst, RegisterA64 src); + void fcvtzs(RegisterA64 dst, RegisterA64 src); + void fcvtzu(RegisterA64 dst, RegisterA64 src); + void scvtf(RegisterA64 dst, RegisterA64 src); + void ucvtf(RegisterA64 dst, RegisterA64 src); + + // Floating-point conversion to integer using JS rules (wrap around 2^32) and set Z flag + // note: this is part of ARM8.3 (JSCVT feature); support of this instruction needs to be checked at runtime + void fjcvtzs(RegisterA64 dst, RegisterA64 src); + + // Floating-point comparisons + void fcmp(RegisterA64 src1, RegisterA64 src2); + void fcmpz(RegisterA64 src); + void fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond); + // Run final checks bool finalize(); @@ -121,6 +157,7 @@ public: std::string text; const bool logText = false; + const unsigned int features = 0; // Maximum immediate argument to functions like add/sub/cmp static constexpr size_t kMaxImmediate = (1 << 12) - 1; @@ -134,13 +171,15 @@ private: void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op); void placeI12(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op); void placeI16(const char* name, RegisterA64 dst, int src, uint8_t op, int shift = 0); - void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size); + void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size, int sizelog); void placeBC(const char* name, Label& label, uint8_t op, uint8_t cond); void placeBCR(const char* name, Label& label, uint8_t op, RegisterA64 cond); void placeBR(const char* name, RegisterA64 src, uint32_t op); void placeADR(const char* name, RegisterA64 src, uint8_t op); void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label); - void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t size); + void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog); + void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc); + void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc); void place(uint32_t word); @@ -164,6 +203,7 @@ private: LUAU_NOINLINE void log(const char* opcode, RegisterA64 src, Label label); LUAU_NOINLINE void log(const char* opcode, RegisterA64 src); LUAU_NOINLINE void log(const char* opcode, Label label); + LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond); LUAU_NOINLINE void log(Label label); LUAU_NOINLINE void log(RegisterA64 reg); LUAU_NOINLINE void log(AddressA64 addr); diff --git a/CodeGen/include/Luau/AssemblyBuilderX64.h b/CodeGen/include/Luau/AssemblyBuilderX64.h index 17076ed6..2b2a849c 100644 --- a/CodeGen/include/Luau/AssemblyBuilderX64.h +++ b/CodeGen/include/Luau/AssemblyBuilderX64.h @@ -41,6 +41,7 @@ enum class ABIX64 class AssemblyBuilderX64 { public: + explicit AssemblyBuilderX64(bool logText, ABIX64 abi); explicit AssemblyBuilderX64(bool logText); ~AssemblyBuilderX64(); diff --git a/CodeGen/include/Luau/ConditionA64.h b/CodeGen/include/Luau/ConditionA64.h index 0beadad5..e94adbcf 100644 --- a/CodeGen/include/Luau/ConditionA64.h +++ b/CodeGen/include/Luau/ConditionA64.h @@ -8,28 +8,45 @@ namespace CodeGen namespace A64 { +// See Table C1-1 on page C1-229 of Arm ARM for A-profile architecture enum class ConditionA64 { + // EQ: integer (equal), floating-point (equal) Equal, + // NE: integer (not equal), floating-point (not equal or unordered) NotEqual, + // CS: integer (carry set), floating-point (greater than, equal or unordered) CarrySet, + // CC: integer (carry clear), floating-point (less than) CarryClear, + // MI: integer (negative), floating-point (less than) Minus, + // PL: integer (positive or zero), floating-point (greater than, equal or unordered) Plus, + // VS: integer (overflow), floating-point (unordered) Overflow, + // VC: integer (no overflow), floating-point (ordered) NoOverflow, + // HI: integer (unsigned higher), floating-point (greater than, or unordered) UnsignedGreater, + // LS: integer (unsigned lower or same), floating-point (less than or equal) UnsignedLessEqual, + // GE: integer (signed greater than or equal), floating-point (greater than or equal) GreaterEqual, + // LT: integer (signed less than), floating-point (less than, or unordered) Less, + + // GT: integer (signed greater than), floating-point (greater than) Greater, + // LE: integer (signed less than or equal), floating-point (less than, equal or unordered) LessEqual, + // AL: always Always, Count diff --git a/CodeGen/include/Luau/IrCallWrapperX64.h b/CodeGen/include/Luau/IrCallWrapperX64.h new file mode 100644 index 00000000..b70c8da6 --- /dev/null +++ b/CodeGen/include/Luau/IrCallWrapperX64.h @@ -0,0 +1,82 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#pragma once + +#include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrData.h" +#include "Luau/OperandX64.h" +#include "Luau/RegisterX64.h" + +#include + +// TODO: call wrapper can be used to suggest target registers for ScopedRegX64 to compute data into argument registers directly + +namespace Luau +{ +namespace CodeGen +{ +namespace X64 +{ + +// When IrInst operands are used, current instruction index is required to track lifetime +// In all other calls it is ok to omit the argument +constexpr uint32_t kInvalidInstIdx = ~0u; + +struct IrRegAllocX64; +struct ScopedRegX64; + +struct CallArgument +{ + SizeX64 targetSize = SizeX64::none; + + OperandX64 source = noreg; + IrOp sourceOp; + + OperandX64 target = noreg; + bool candidate = true; +}; + +class IrCallWrapperX64 +{ +public: + IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx = kInvalidInstIdx); + + void addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp = {}); + void addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg); + + void call(const OperandX64& func); + + IrRegAllocX64& regs; + AssemblyBuilderX64& build; + uint32_t instIdx = ~0u; + +private: + void assignTargetRegisters(); + void countRegisterUses(); + CallArgument* findNonInterferingArgument(); + bool interferesWithOperand(const OperandX64& op, RegisterX64 reg) const; + bool interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const; + bool interferesWithActiveTarget(RegisterX64 sourceReg) const; + void moveToTarget(CallArgument& arg); + void freeSourceRegisters(CallArgument& arg); + void renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement); + void renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement); + RegisterX64 findConflictingTarget() const; + + int getRegisterUses(RegisterX64 reg) const; + void addRegisterUse(RegisterX64 reg); + void removeRegisterUse(RegisterX64 reg); + + static const int kMaxCallArguments = 6; + std::array args; + int argCount = 0; + + OperandX64 funcOp; + + // Internal counters for remaining register use counts + std::array gprUses; + std::array xmmUses; +}; + +} // namespace X64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/include/Luau/IrData.h b/CodeGen/include/Luau/IrData.h index e8b2bc62..75216081 100644 --- a/CodeGen/include/Luau/IrData.h +++ b/CodeGen/include/Luau/IrData.h @@ -125,6 +125,26 @@ enum class IrCmd : uint8_t // A: double UNM_NUM, + // Round number to negative infinity (math.floor) + // A: double + FLOOR_NUM, + + // Round number to positive infinity (math.ceil) + // A: double + CEIL_NUM, + + // Round number to nearest integer number, rounding half-way cases away from zero (math.round) + // A: double + ROUND_NUM, + + // Get square root of the argument (math.sqrt) + // A: double + SQRT_NUM, + + // Get absolute value of the argument (math.abs) + // A: double + ABS_NUM, + // Compute Luau 'not' operation on destructured TValue // A: tag // B: double @@ -252,6 +272,7 @@ enum class IrCmd : uint8_t // A: Rn (where to store the result) // B: Rn (lhs) // C: Rn or Kn (rhs) + // D: int (TMS enum with arithmetic type) DO_ARITH, // Get length of a TValue of any type @@ -382,54 +403,53 @@ enum class IrCmd : uint8_t // C: Rn (source start) // D: int (count or -1 to assign values up to stack top) // E: unsigned int (table index to start from) - LOP_SETLIST, + SETLIST, // Call specified function // A: Rn (function, followed by arguments) // B: int (argument count or -1 to use all arguments up to stack top) // C: int (result count or -1 to preserve all results and adjust stack top) // Note: return values are placed starting from Rn specified in 'A' - LOP_CALL, + CALL, // Return specified values from the function // A: Rn (value start) // B: int (result count or -1 to return all values up to stack top) - LOP_RETURN, + RETURN, // Adjust loop variables for one iteration of a generic for loop, jump back to the loop header if loop needs to continue // A: Rn (loop variable start, updates Rn+2 and 'B' number of registers starting from Rn+3) // B: int (loop variable count, if more than 2, registers starting from Rn+5 are set to nil) // C: block (repeat) // D: block (exit) - LOP_FORGLOOP, + FORGLOOP, // Handle LOP_FORGLOOP fallback when variable being iterated is not a table - // A: unsigned int (bytecode instruction index) - // B: Rn (loop state start, updates Rn+2 and 'C' number of registers starting from Rn+3) - // C: int (loop variable count and a MSB set when it's an ipairs-like iteration loop) - // D: block (repeat) - // E: block (exit) - LOP_FORGLOOP_FALLBACK, + // A: Rn (loop state start, updates Rn+2 and 'B' number of registers starting from Rn+3) + // B: int (loop variable count and a MSB set when it's an ipairs-like iteration loop) + // C: block (repeat) + // D: block (exit) + FORGLOOP_FALLBACK, // Fallback for generic for loop preparation when iterating over builtin pairs/ipairs // It raises an error if 'B' register is not a function // A: unsigned int (bytecode instruction index) // B: Rn // C: block (forgloop location) - LOP_FORGPREP_XNEXT_FALLBACK, + FORGPREP_XNEXT_FALLBACK, // Perform `and` or `or` operation (selecting lhs or rhs based on whether the lhs is truthy) and put the result into target register // A: Rn (target) // B: Rn (lhs) // C: Rn or Kn (rhs) - LOP_AND, - LOP_ANDK, - LOP_OR, - LOP_ORK, + AND, + ANDK, + OR, + ORK, // Increment coverage data (saturating 24 bit add) // A: unsigned int (bytecode instruction index) - LOP_COVERAGE, + COVERAGE, // Operations that have a translation, but use a full instruction fallback @@ -676,6 +696,14 @@ struct IrFunction return instructions[op.index]; } + IrInst* asInstOp(IrOp op) + { + if (op.kind == IrOpKind::Inst) + return &instructions[op.index]; + + return nullptr; + } + IrConst& constOp(IrOp op) { LUAU_ASSERT(op.kind == IrOpKind::Constant); diff --git a/CodeGen/src/IrRegAllocX64.h b/CodeGen/include/Luau/IrRegAllocX64.h similarity index 85% rename from CodeGen/src/IrRegAllocX64.h rename to CodeGen/include/Luau/IrRegAllocX64.h index 497bb035..c2486faf 100644 --- a/CodeGen/src/IrRegAllocX64.h +++ b/CodeGen/include/Luau/IrRegAllocX64.h @@ -24,12 +24,17 @@ struct IrRegAllocX64 RegisterX64 allocGprRegOrReuse(SizeX64 preferredSize, uint32_t index, std::initializer_list oprefs); RegisterX64 allocXmmRegOrReuse(uint32_t index, std::initializer_list oprefs); - RegisterX64 takeGprReg(RegisterX64 reg); + RegisterX64 takeReg(RegisterX64 reg); void freeReg(RegisterX64 reg); void freeLastUseReg(IrInst& target, uint32_t index); void freeLastUseRegs(const IrInst& inst, uint32_t index); + bool isLastUseReg(const IrInst& target, uint32_t index) const; + + bool shouldFreeGpr(RegisterX64 reg) const; + + void assertFree(RegisterX64 reg) const; void assertAllFree() const; IrFunction& function; @@ -51,6 +56,8 @@ struct ScopedRegX64 void alloc(SizeX64 size); void free(); + RegisterX64 release(); + IrRegAllocX64& owner; RegisterX64 reg; }; diff --git a/CodeGen/include/Luau/IrUtils.h b/CodeGen/include/Luau/IrUtils.h index 0fc14025..6e73e47a 100644 --- a/CodeGen/include/Luau/IrUtils.h +++ b/CodeGen/include/Luau/IrUtils.h @@ -99,10 +99,10 @@ inline bool isBlockTerminator(IrCmd cmd) case IrCmd::JUMP_CMP_NUM: case IrCmd::JUMP_CMP_ANY: case IrCmd::JUMP_SLOT_MATCH: - case IrCmd::LOP_RETURN: - case IrCmd::LOP_FORGLOOP: - case IrCmd::LOP_FORGLOOP_FALLBACK: - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::RETURN: + case IrCmd::FORGLOOP: + case IrCmd::FORGLOOP_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: case IrCmd::FALLBACK_FORGPREP: return true; default: @@ -137,6 +137,11 @@ inline bool hasResult(IrCmd cmd) case IrCmd::MIN_NUM: case IrCmd::MAX_NUM: case IrCmd::UNM_NUM: + case IrCmd::FLOOR_NUM: + case IrCmd::CEIL_NUM: + case IrCmd::ROUND_NUM: + case IrCmd::SQRT_NUM: + case IrCmd::ABS_NUM: case IrCmd::NOT_ANY: case IrCmd::TABLE_LEN: case IrCmd::NEW_TABLE: diff --git a/CodeGen/include/Luau/RegisterA64.h b/CodeGen/include/Luau/RegisterA64.h index 519e83fc..242e8b79 100644 --- a/CodeGen/include/Luau/RegisterA64.h +++ b/CodeGen/include/Luau/RegisterA64.h @@ -17,6 +17,8 @@ enum class KindA64 : uint8_t none, w, // 32-bit GPR x, // 64-bit GPR + d, // 64-bit SIMD&FP scalar + q, // 128-bit SIMD&FP vector }; struct RegisterA64 @@ -105,6 +107,72 @@ constexpr RegisterA64 xzr{KindA64::x, 31}; constexpr RegisterA64 sp{KindA64::none, 31}; +constexpr RegisterA64 d0{KindA64::d, 0}; +constexpr RegisterA64 d1{KindA64::d, 1}; +constexpr RegisterA64 d2{KindA64::d, 2}; +constexpr RegisterA64 d3{KindA64::d, 3}; +constexpr RegisterA64 d4{KindA64::d, 4}; +constexpr RegisterA64 d5{KindA64::d, 5}; +constexpr RegisterA64 d6{KindA64::d, 6}; +constexpr RegisterA64 d7{KindA64::d, 7}; +constexpr RegisterA64 d8{KindA64::d, 8}; +constexpr RegisterA64 d9{KindA64::d, 9}; +constexpr RegisterA64 d10{KindA64::d, 10}; +constexpr RegisterA64 d11{KindA64::d, 11}; +constexpr RegisterA64 d12{KindA64::d, 12}; +constexpr RegisterA64 d13{KindA64::d, 13}; +constexpr RegisterA64 d14{KindA64::d, 14}; +constexpr RegisterA64 d15{KindA64::d, 15}; +constexpr RegisterA64 d16{KindA64::d, 16}; +constexpr RegisterA64 d17{KindA64::d, 17}; +constexpr RegisterA64 d18{KindA64::d, 18}; +constexpr RegisterA64 d19{KindA64::d, 19}; +constexpr RegisterA64 d20{KindA64::d, 20}; +constexpr RegisterA64 d21{KindA64::d, 21}; +constexpr RegisterA64 d22{KindA64::d, 22}; +constexpr RegisterA64 d23{KindA64::d, 23}; +constexpr RegisterA64 d24{KindA64::d, 24}; +constexpr RegisterA64 d25{KindA64::d, 25}; +constexpr RegisterA64 d26{KindA64::d, 26}; +constexpr RegisterA64 d27{KindA64::d, 27}; +constexpr RegisterA64 d28{KindA64::d, 28}; +constexpr RegisterA64 d29{KindA64::d, 29}; +constexpr RegisterA64 d30{KindA64::d, 30}; +constexpr RegisterA64 d31{KindA64::d, 31}; + +constexpr RegisterA64 q0{KindA64::q, 0}; +constexpr RegisterA64 q1{KindA64::q, 1}; +constexpr RegisterA64 q2{KindA64::q, 2}; +constexpr RegisterA64 q3{KindA64::q, 3}; +constexpr RegisterA64 q4{KindA64::q, 4}; +constexpr RegisterA64 q5{KindA64::q, 5}; +constexpr RegisterA64 q6{KindA64::q, 6}; +constexpr RegisterA64 q7{KindA64::q, 7}; +constexpr RegisterA64 q8{KindA64::q, 8}; +constexpr RegisterA64 q9{KindA64::q, 9}; +constexpr RegisterA64 q10{KindA64::q, 10}; +constexpr RegisterA64 q11{KindA64::q, 11}; +constexpr RegisterA64 q12{KindA64::q, 12}; +constexpr RegisterA64 q13{KindA64::q, 13}; +constexpr RegisterA64 q14{KindA64::q, 14}; +constexpr RegisterA64 q15{KindA64::q, 15}; +constexpr RegisterA64 q16{KindA64::q, 16}; +constexpr RegisterA64 q17{KindA64::q, 17}; +constexpr RegisterA64 q18{KindA64::q, 18}; +constexpr RegisterA64 q19{KindA64::q, 19}; +constexpr RegisterA64 q20{KindA64::q, 20}; +constexpr RegisterA64 q21{KindA64::q, 21}; +constexpr RegisterA64 q22{KindA64::q, 22}; +constexpr RegisterA64 q23{KindA64::q, 23}; +constexpr RegisterA64 q24{KindA64::q, 24}; +constexpr RegisterA64 q25{KindA64::q, 25}; +constexpr RegisterA64 q26{KindA64::q, 26}; +constexpr RegisterA64 q27{KindA64::q, 27}; +constexpr RegisterA64 q28{KindA64::q, 28}; +constexpr RegisterA64 q29{KindA64::q, 29}; +constexpr RegisterA64 q30{KindA64::q, 30}; +constexpr RegisterA64 q31{KindA64::q, 31}; + } // namespace A64 } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/AssemblyBuilderA64.cpp b/CodeGen/src/AssemblyBuilderA64.cpp index bedd2740..a80003e9 100644 --- a/CodeGen/src/AssemblyBuilderA64.cpp +++ b/CodeGen/src/AssemblyBuilderA64.cpp @@ -21,8 +21,9 @@ static_assert(sizeof(textForCondition) / sizeof(textForCondition[0]) == size_t(C const unsigned kMaxAlign = 32; -AssemblyBuilderA64::AssemblyBuilderA64(bool logText) +AssemblyBuilderA64::AssemblyBuilderA64(bool logText, unsigned int features) : logText(logText) + , features(features) { data.resize(4096); dataPos = data.size(); // data is filled backwards @@ -39,6 +40,9 @@ AssemblyBuilderA64::~AssemblyBuilderA64() void AssemblyBuilderA64::mov(RegisterA64 dst, RegisterA64 src) { + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst == sp); + LUAU_ASSERT(dst.kind == src.kind || (dst.kind == KindA64::x && src == sp) || (dst == sp && src.kind == KindA64::x)); + if (dst == sp || src == sp) placeR1("mov", dst, src, 0b00'100010'0'000000000000); else @@ -115,6 +119,13 @@ void AssemblyBuilderA64::cmp(RegisterA64 src1, uint16_t src2) placeI12("cmp", dst, src1, src2, 0b11'10001); } +void AssemblyBuilderA64::csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond) +{ + LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); + + placeCS("csel", dst, src1, src2, cond, 0b11010'10'0, 0b00); +} + void AssemblyBuilderA64::and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) { placeSR3("and", dst, src1, src2, 0b00'01010); @@ -157,54 +168,76 @@ void AssemblyBuilderA64::ror(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2 void AssemblyBuilderA64::clz(RegisterA64 dst, RegisterA64 src) { + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(dst.kind == src.kind); + placeR1("clz", dst, src, 0b10'11010110'00000'00010'0); } void AssemblyBuilderA64::rbit(RegisterA64 dst, RegisterA64 src) { + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(dst.kind == src.kind); + placeR1("rbit", dst, src, 0b10'11010110'00000'0000'00); } void AssemblyBuilderA64::ldr(RegisterA64 dst, AddressA64 src) { - LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); + LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w || dst.kind == KindA64::d || dst.kind == KindA64::q); - placeA("ldr", dst, src, 0b11100001, 0b10 | uint8_t(dst.kind == KindA64::x)); + switch (dst.kind) + { + case KindA64::w: + placeA("ldr", dst, src, 0b11100001, 0b10, 2); + break; + case KindA64::x: + placeA("ldr", dst, src, 0b11100001, 0b11, 3); + break; + case KindA64::d: + placeA("ldr", dst, src, 0b11110001, 0b11, 3); + break; + case KindA64::q: + placeA("ldr", dst, src, 0b11110011, 0b00, 4); + break; + case KindA64::none: + LUAU_ASSERT(!"Unexpected register kind"); + } } void AssemblyBuilderA64::ldrb(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::w); - placeA("ldrb", dst, src, 0b11100001, 0b00); + placeA("ldrb", dst, src, 0b11100001, 0b00, 2); } void AssemblyBuilderA64::ldrh(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::w); - placeA("ldrh", dst, src, 0b11100001, 0b01); + placeA("ldrh", dst, src, 0b11100001, 0b01, 2); } void AssemblyBuilderA64::ldrsb(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); - placeA("ldrsb", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b00); + placeA("ldrsb", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b00, 0); } void AssemblyBuilderA64::ldrsh(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); - placeA("ldrsh", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b01); + placeA("ldrsh", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b01, 1); } void AssemblyBuilderA64::ldrsw(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::x); - placeA("ldrsw", dst, src, 0b11100010, 0b10); + placeA("ldrsw", dst, src, 0b11100010, 0b10, 2); } void AssemblyBuilderA64::ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src) @@ -212,28 +245,44 @@ void AssemblyBuilderA64::ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src) LUAU_ASSERT(dst1.kind == KindA64::x || dst1.kind == KindA64::w); LUAU_ASSERT(dst1.kind == dst2.kind); - placeP("ldp", dst1, dst2, src, 0b101'0'010'1, 0b10 | uint8_t(dst1.kind == KindA64::x)); + placeP("ldp", dst1, dst2, src, 0b101'0'010'1, uint8_t(dst1.kind == KindA64::x) << 1, dst1.kind == KindA64::x ? 3 : 2); } void AssemblyBuilderA64::str(RegisterA64 src, AddressA64 dst) { - LUAU_ASSERT(src.kind == KindA64::x || src.kind == KindA64::w); + LUAU_ASSERT(src.kind == KindA64::x || src.kind == KindA64::w || src.kind == KindA64::d || src.kind == KindA64::q); - placeA("str", src, dst, 0b11100000, 0b10 | uint8_t(src.kind == KindA64::x)); + switch (src.kind) + { + case KindA64::w: + placeA("str", src, dst, 0b11100000, 0b10, 2); + break; + case KindA64::x: + placeA("str", src, dst, 0b11100000, 0b11, 3); + break; + case KindA64::d: + placeA("str", src, dst, 0b11110000, 0b11, 3); + break; + case KindA64::q: + placeA("str", src, dst, 0b11110010, 0b00, 4); + break; + case KindA64::none: + LUAU_ASSERT(!"Unexpected register kind"); + } } void AssemblyBuilderA64::strb(RegisterA64 src, AddressA64 dst) { LUAU_ASSERT(src.kind == KindA64::w); - placeA("strb", src, dst, 0b11100000, 0b00); + placeA("strb", src, dst, 0b11100000, 0b00, 2); } void AssemblyBuilderA64::strh(RegisterA64 src, AddressA64 dst) { LUAU_ASSERT(src.kind == KindA64::w); - placeA("strh", src, dst, 0b11100000, 0b01); + placeA("strh", src, dst, 0b11100000, 0b01, 2); } void AssemblyBuilderA64::stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst) @@ -241,7 +290,7 @@ void AssemblyBuilderA64::stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst) LUAU_ASSERT(src1.kind == KindA64::x || src1.kind == KindA64::w); LUAU_ASSERT(src1.kind == src2.kind); - placeP("stp", src1, src2, dst, 0b101'0'010'0, 0b10 | uint8_t(src1.kind == KindA64::x)); + placeP("stp", src1, src2, dst, 0b101'0'010'0, uint8_t(src1.kind == KindA64::x) << 1, src1.kind == KindA64::x ? 3 : 2); } void AssemblyBuilderA64::b(Label& label) @@ -318,6 +367,145 @@ void AssemblyBuilderA64::adr(RegisterA64 dst, Label& label) placeADR("adr", dst, 0b10000, label); } +void AssemblyBuilderA64::fmov(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fmov", dst, src, 0b000'11110'01'1'0000'00'10000); +} + +void AssemblyBuilderA64::fabs(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fabs", dst, src, 0b000'11110'01'1'0000'01'10000); +} + +void AssemblyBuilderA64::fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fadd", dst, src1, src2, 0b11110'01'1, 0b0010'10); +} + +void AssemblyBuilderA64::fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fdiv", dst, src1, src2, 0b11110'01'1, 0b0001'10); +} + +void AssemblyBuilderA64::fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fmul", dst, src1, src2, 0b11110'01'1, 0b0000'10); +} + +void AssemblyBuilderA64::fneg(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fneg", dst, src, 0b000'11110'01'1'0000'10'10000); +} + +void AssemblyBuilderA64::fsqrt(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fsqrt", dst, src, 0b000'11110'01'1'0000'11'10000); +} + +void AssemblyBuilderA64::fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fsub", dst, src1, src2, 0b11110'01'1, 0b0011'10); +} + +void AssemblyBuilderA64::frinta(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("frinta", dst, src, 0b000'11110'01'1'001'100'10000); +} + +void AssemblyBuilderA64::frintm(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("frintm", dst, src, 0b000'11110'01'1'001'010'10000); +} + +void AssemblyBuilderA64::frintp(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("frintp", dst, src, 0b000'11110'01'1'001'001'10000); +} + +void AssemblyBuilderA64::fcvtzs(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(src.kind == KindA64::d); + + placeR1("fcvtzs", dst, src, 0b000'11110'01'1'11'000'000000); +} + +void AssemblyBuilderA64::fcvtzu(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(src.kind == KindA64::d); + + placeR1("fcvtzu", dst, src, 0b000'11110'01'1'11'001'000000); +} + +void AssemblyBuilderA64::scvtf(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d); + LUAU_ASSERT(src.kind == KindA64::w || src.kind == KindA64::x); + + placeR1("scvtf", dst, src, 0b000'11110'01'1'00'010'000000); +} + +void AssemblyBuilderA64::ucvtf(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d); + LUAU_ASSERT(src.kind == KindA64::w || src.kind == KindA64::x); + + placeR1("ucvtf", dst, src, 0b000'11110'01'1'00'011'000000); +} + +void AssemblyBuilderA64::fjcvtzs(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::w); + LUAU_ASSERT(src.kind == KindA64::d); + LUAU_ASSERT(features & Feature_JSCVT); + + placeR1("fjcvtzs", dst, src, 0b000'11110'01'1'11'110'000000); +} + +void AssemblyBuilderA64::fcmp(RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeFCMP("fcmp", src1, src2, 0b11110'01'1, 0b00); +} + +void AssemblyBuilderA64::fcmpz(RegisterA64 src) +{ + LUAU_ASSERT(src.kind == KindA64::d); + + placeFCMP("fcmp", src, RegisterA64{src.kind, 0}, 0b11110'01'1, 0b01); +} + +void AssemblyBuilderA64::fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond) +{ + LUAU_ASSERT(dst.kind == KindA64::d); + + placeCS("fcsel", dst, src1, src2, cond, 0b11110'01'1, 0b11); +} + bool AssemblyBuilderA64::finalize() { code.resize(codePos - code.data()); @@ -429,7 +617,7 @@ void AssemblyBuilderA64::placeR3(const char* name, RegisterA64 dst, RegisterA64 if (logText) log(name, dst, src1, src2); - LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst.kind == KindA64::d); LUAU_ASSERT(dst.kind == src1.kind && dst.kind == src2.kind); uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0; @@ -443,10 +631,7 @@ void AssemblyBuilderA64::placeR1(const char* name, RegisterA64 dst, RegisterA64 if (logText) log(name, dst, src); - LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst == sp); - LUAU_ASSERT(dst.kind == src.kind || (dst.kind == KindA64::x && src == sp) || (dst == sp && src.kind == KindA64::x)); - - uint32_t sf = (dst.kind != KindA64::w) ? 0x80000000 : 0; + uint32_t sf = (dst.kind == KindA64::x || src.kind == KindA64::x) ? 0x80000000 : 0; place(dst.index | (src.index << 5) | (op << 10) | sf); commit(); @@ -482,7 +667,7 @@ void AssemblyBuilderA64::placeI16(const char* name, RegisterA64 dst, int src, ui commit(); } -void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size) +void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size, int sizelog) { if (logText) log(name, dst, src); @@ -490,8 +675,8 @@ void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 sr switch (src.kind) { case AddressKindA64::imm: - if (src.data >= 0 && src.data % (1 << size) == 0) - place(dst.index | (src.base.index << 5) | ((src.data >> size) << 10) | (op << 22) | (1 << 24) | (size << 30)); + if (src.data >= 0 && (src.data >> sizelog) < 1024 && (src.data & ((1 << sizelog) - 1)) == 0) + place(dst.index | (src.base.index << 5) | ((src.data >> sizelog) << 10) | (op << 22) | (1 << 24) | (size << 30)); else if (src.data >= -256 && src.data <= 255) place(dst.index | (src.base.index << 5) | ((src.data & ((1 << 9) - 1)) << 12) | (op << 22) | (size << 30)); else @@ -566,16 +751,45 @@ void AssemblyBuilderA64::placeADR(const char* name, RegisterA64 dst, uint8_t op, log(name, dst, label); } -void AssemblyBuilderA64::placeP(const char* name, RegisterA64 src1, RegisterA64 src2, AddressA64 dst, uint8_t op, uint8_t size) +void AssemblyBuilderA64::placeP(const char* name, RegisterA64 src1, RegisterA64 src2, AddressA64 dst, uint8_t op, uint8_t opc, int sizelog) { if (logText) log(name, src1, src2, dst); LUAU_ASSERT(dst.kind == AddressKindA64::imm); - LUAU_ASSERT(dst.data >= -128 * (1 << size) && dst.data <= 127 * (1 << size)); - LUAU_ASSERT(dst.data % (1 << size) == 0); + LUAU_ASSERT(dst.data >= -128 * (1 << sizelog) && dst.data <= 127 * (1 << sizelog)); + LUAU_ASSERT(dst.data % (1 << sizelog) == 0); - place(src1.index | (dst.base.index << 5) | (src2.index << 10) | (((dst.data >> size) & 127) << 15) | (op << 22) | (size << 31)); + place(src1.index | (dst.base.index << 5) | (src2.index << 10) | (((dst.data >> sizelog) & 127) << 15) | (op << 22) | (opc << 30)); + commit(); +} + +void AssemblyBuilderA64::placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc) +{ + if (logText) + log(name, dst, src1, src2, cond); + + LUAU_ASSERT(dst.kind == src1.kind && dst.kind == src2.kind); + + uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0; + + place(dst.index | (src1.index << 5) | (opc << 10) | (codeForCondition[int(cond)] << 12) | (src2.index << 16) | (op << 21) | sf); + commit(); +} + +void AssemblyBuilderA64::placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc) +{ + if (logText) + { + if (opc) + log(name, src1, 0); + else + log(name, src1, src2); + } + + LUAU_ASSERT(src1.kind == src2.kind); + + place((opc << 3) | (src1.index << 5) | (0b1000 << 10) | (src2.index << 16) | (op << 21)); commit(); } @@ -747,6 +961,19 @@ void AssemblyBuilderA64::log(const char* opcode, Label label) logAppend(" %-12s.L%d\n", opcode, label.id); } +void AssemblyBuilderA64::log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond) +{ + logAppend(" %-12s", opcode); + log(dst); + text.append(","); + log(src1); + text.append(","); + log(src2); + text.append(","); + text.append(textForCondition[int(cond)] + 2); // skip b. + text.append("\n"); +} + void AssemblyBuilderA64::log(Label label) { logAppend(".L%d:\n", label.id); @@ -770,6 +997,14 @@ void AssemblyBuilderA64::log(RegisterA64 reg) logAppend("x%d", reg.index); break; + case KindA64::d: + logAppend("d%d", reg.index); + break; + + case KindA64::q: + logAppend("q%d", reg.index); + break; + case KindA64::none: if (reg.index == 31) text.append("sp"); diff --git a/CodeGen/src/AssemblyBuilderX64.cpp b/CodeGen/src/AssemblyBuilderX64.cpp index bf7889b8..0285c2a1 100644 --- a/CodeGen/src/AssemblyBuilderX64.cpp +++ b/CodeGen/src/AssemblyBuilderX64.cpp @@ -71,9 +71,9 @@ static ABIX64 getCurrentX64ABI() #endif } -AssemblyBuilderX64::AssemblyBuilderX64(bool logText) +AssemblyBuilderX64::AssemblyBuilderX64(bool logText, ABIX64 abi) : logText(logText) - , abi(getCurrentX64ABI()) + , abi(abi) { data.resize(4096); dataPos = data.size(); // data is filled backwards @@ -83,6 +83,11 @@ AssemblyBuilderX64::AssemblyBuilderX64(bool logText) codeEnd = code.data() + code.size(); } +AssemblyBuilderX64::AssemblyBuilderX64(bool logText) + : AssemblyBuilderX64(logText, getCurrentX64ABI()) +{ +} + AssemblyBuilderX64::~AssemblyBuilderX64() { LUAU_ASSERT(finalized); diff --git a/CodeGen/src/CodeGen.cpp b/CodeGen/src/CodeGen.cpp index 5ef5ba64..b0cc8d9c 100644 --- a/CodeGen/src/CodeGen.cpp +++ b/CodeGen/src/CodeGen.cpp @@ -43,6 +43,12 @@ #endif #endif +#if defined(__aarch64__) +#ifdef __APPLE__ +#include +#endif +#endif + LUAU_FASTFLAGVARIABLE(DebugCodegenNoOpt, false) namespace Luau @@ -209,7 +215,7 @@ static void lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& } } -[[maybe_unused]] static void lowerIr( +[[maybe_unused]] static bool lowerIr( X64::AssemblyBuilderX64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options) { constexpr uint32_t kFunctionAlignment = 32; @@ -221,31 +227,21 @@ static void lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& X64::IrLoweringX64 lowering(build, helpers, data, ir.function); lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); + + return true; } -[[maybe_unused]] static void lowerIr( +[[maybe_unused]] static bool lowerIr( A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options) { - if (A64::IrLoweringA64::canLower(ir.function)) - { - A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function); + if (!A64::IrLoweringA64::canLower(ir.function)) + return false; - lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); - } - else - { - // TODO: This is only needed while we don't support all IR opcodes - // When we can't translate some parts of the function, we instead encode a dummy assembly sequence that hands off control to VM - // In the future we could return nullptr from assembleFunction and handle it because there may be other reasons for why we refuse to assemble. - Label start = build.setLabel(); + A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function); - build.mov(A64::x0, 1); // finish function in VM - build.ldr(A64::x1, A64::mem(A64::rNativeContext, offsetof(NativeContext, gateExit))); - build.br(A64::x1); + lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); - for (int i = 0; i < proto->sizecode; i++) - ir.function.bcMapping[i].asmLocation = build.getLabelOffset(start); - } + return true; } template @@ -289,7 +285,13 @@ static NativeProto* assembleFunction(AssemblyBuilder& build, NativeState& data, constPropInBlockChains(ir); } - lowerIr(build, ir, data, helpers, proto, options); + if (!lowerIr(build, ir, data, helpers, proto, options)) + { + if (build.logText) + build.logAppend("; skipping (can't lower)\n\n"); + + return nullptr; + } if (build.logText) build.logAppend("\n"); @@ -345,6 +347,22 @@ static void onSetBreakpoint(lua_State* L, Proto* proto, int instruction) LUAU_ASSERT(!"native breakpoints are not implemented"); } +#if defined(__aarch64__) +static unsigned int getCpuFeaturesA64() +{ + unsigned int result = 0; + +#ifdef __APPLE__ + int jscvt = 0; + size_t jscvtLen = sizeof(jscvt); + if (sysctlbyname("hw.optional.arm.FEAT_JSCVT", &jscvt, &jscvtLen, nullptr, 0) == 0 && jscvt == 1) + result |= A64::Feature_JSCVT; +#endif + + return result; +} +#endif + bool isSupported() { #if !LUA_CUSTOM_EXECUTION @@ -374,8 +392,20 @@ bool isSupported() return true; #elif defined(__aarch64__) + if (LUA_EXTRA_SIZE != 1) + return false; + + if (sizeof(TValue) != 16) + return false; + + if (sizeof(LuaNode) != 32) + return false; + // TODO: A64 codegen does not generate correct unwind info at the moment so it requires longjmp instead of C++ exceptions - return bool(LUA_USE_LONGJMP); + if (!LUA_USE_LONGJMP) + return false; + + return true; #else return false; #endif @@ -447,7 +477,7 @@ void compile(lua_State* L, int idx) return; #if defined(__aarch64__) - A64::AssemblyBuilderA64 build(/* logText= */ false); + A64::AssemblyBuilderA64 build(/* logText= */ false, getCpuFeaturesA64()); #else X64::AssemblyBuilderX64 build(/* logText= */ false); #endif @@ -470,10 +500,15 @@ void compile(lua_State* L, int idx) // Skip protos that have been compiled during previous invocations of CodeGen::compile for (Proto* p : protos) if (p && getProtoExecData(p) == nullptr) - results.push_back(assembleFunction(build, *data, helpers, p, {})); + if (NativeProto* np = assembleFunction(build, *data, helpers, p, {})) + results.push_back(np); build.finalize(); + // If no functions were assembled, we don't need to allocate/copy executable pages for helpers + if (results.empty()) + return; + uint8_t* nativeData = nullptr; size_t sizeNativeData = 0; uint8_t* codeStart = nullptr; @@ -507,7 +542,7 @@ std::string getAssembly(lua_State* L, int idx, AssemblyOptions options) const TValue* func = luaA_toobject(L, idx); #if defined(__aarch64__) - A64::AssemblyBuilderA64 build(/* logText= */ options.includeAssembly); + A64::AssemblyBuilderA64 build(/* logText= */ options.includeAssembly, getCpuFeaturesA64()); #else X64::AssemblyBuilderX64 build(/* logText= */ options.includeAssembly); #endif @@ -527,10 +562,8 @@ std::string getAssembly(lua_State* L, int idx, AssemblyOptions options) for (Proto* p : protos) if (p) - { - NativeProto* nativeProto = assembleFunction(build, data, helpers, p, options); - destroyNativeProto(nativeProto); - } + if (NativeProto* np = assembleFunction(build, data, helpers, p, options)) + destroyNativeProto(np); build.finalize(); diff --git a/CodeGen/src/CodeGenA64.cpp b/CodeGen/src/CodeGenA64.cpp index 028b3327..e7a1e2e2 100644 --- a/CodeGen/src/CodeGenA64.cpp +++ b/CodeGen/src/CodeGenA64.cpp @@ -100,6 +100,16 @@ void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers) build.logAppend("; exitNoContinueVm\n"); helpers.exitNoContinueVm = build.setLabel(); emitExit(build, /* continueInVm */ false); + + if (build.logText) + build.logAppend("; reentry\n"); + helpers.reentry = build.setLabel(); + emitReentry(build, helpers); + + if (build.logText) + build.logAppend("; interrupt\n"); + helpers.interrupt = build.setLabel(); + emitInterrupt(build); } } // namespace A64 diff --git a/CodeGen/src/CodeGenUtils.cpp b/CodeGen/src/CodeGenUtils.cpp index 26568c30..ae3dbd45 100644 --- a/CodeGen/src/CodeGenUtils.cpp +++ b/CodeGen/src/CodeGenUtils.cpp @@ -126,7 +126,89 @@ void callEpilogC(lua_State* L, int nresults, int n) L->top = (nresults == LUA_MULTRET) ? res : cip->top; } -const Instruction* returnFallback(lua_State* L, StkId ra, int n) +// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts/savedpc +Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults) +{ + // slow-path: not a function call + if (LUAU_UNLIKELY(!ttisfunction(ra))) + { + luaV_tryfuncTM(L, ra); + argtop++; // __call adds an extra self + } + + Closure* ccl = clvalue(ra); + + CallInfo* ci = incr_ci(L); + ci->func = ra; + ci->base = ra + 1; + ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet + ci->savedpc = NULL; + ci->flags = 0; + ci->nresults = nresults; + + L->base = ci->base; + L->top = argtop; + + // note: this reallocs stack, but we don't need to VM_PROTECT this + // this is because we're going to modify base/savedpc manually anyhow + // crucially, we can't use ra/argtop after this line + luaD_checkstack(L, ccl->stacksize); + + LUAU_ASSERT(ci->top <= L->stack_last); + + if (!ccl->isC) + { + Proto* p = ccl->l.p; + + // fill unused parameters with nil + StkId argi = L->top; + StkId argend = L->base + p->numparams; + while (argi < argend) + setnilvalue(argi++); // complete missing arguments + L->top = p->is_vararg ? argi : ci->top; + + // keep executing new function + ci->savedpc = p->code; + return ccl; + } + else + { + lua_CFunction func = ccl->c.f; + int n = func(L); + + // yield + if (n < 0) + return NULL; + + // ci is our callinfo, cip is our parent + CallInfo* ci = L->ci; + CallInfo* cip = ci - 1; + + // copy return values into parent stack (but only up to nresults!), fill the rest with nil + // note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally + StkId res = ci->func; + StkId vali = L->top - n; + StkId valend = L->top; + + int i; + for (i = nresults; i != 0 && vali < valend; i--) + setobj2s(L, res++, vali++); + while (i-- > 0) + setnilvalue(res++); + + // pop the stack frame + L->ci = cip; + L->base = cip->base; + L->top = (nresults == LUA_MULTRET) ? res : cip->top; + + // keep executing current function + LUAU_ASSERT(isLua(cip)); + return clvalue(cip->func); + } +} + +// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts +Closure* returnFallback(lua_State* L, StkId ra, int n) { // ci is our callinfo, cip is our parent CallInfo* ci = L->ci; @@ -159,8 +241,9 @@ const Instruction* returnFallback(lua_State* L, StkId ra, int n) return NULL; } + // keep executing new function LUAU_ASSERT(isLua(cip)); - return cip->savedpc; + return clvalue(cip->func); } } // namespace CodeGen diff --git a/CodeGen/src/CodeGenUtils.h b/CodeGen/src/CodeGenUtils.h index 5d37bfd1..6066a691 100644 --- a/CodeGen/src/CodeGenUtils.h +++ b/CodeGen/src/CodeGenUtils.h @@ -16,7 +16,8 @@ void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc); Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults); void callEpilogC(lua_State* L, int nresults, int n); -const Instruction* returnFallback(lua_State* L, StkId ra, int n); +Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults); +Closure* returnFallback(lua_State* L, StkId ra, int n); } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/EmitBuiltinsX64.cpp b/CodeGen/src/EmitBuiltinsX64.cpp index d70b6ed8..2e745cbf 100644 --- a/CodeGen/src/EmitBuiltinsX64.cpp +++ b/CodeGen/src/EmitBuiltinsX64.cpp @@ -3,9 +3,10 @@ #include "Luau/AssemblyBuilderX64.h" #include "Luau/Bytecode.h" +#include "Luau/IrCallWrapperX64.h" +#include "Luau/IrRegAllocX64.h" #include "EmitCommonX64.h" -#include "IrRegAllocX64.h" #include "NativeState.h" #include "lstate.h" @@ -19,40 +20,11 @@ namespace CodeGen namespace X64 { -void emitBuiltinMathFloor(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vroundsd(tmp.reg, tmp.reg, luauRegValue(arg), RoundingModeX64::RoundToNegativeInfinity); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - -void emitBuiltinMathCeil(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vroundsd(tmp.reg, tmp.reg, luauRegValue(arg), RoundingModeX64::RoundToPositiveInfinity); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - -void emitBuiltinMathSqrt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vsqrtsd(tmp.reg, tmp.reg, luauRegValue(arg)); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - -void emitBuiltinMathAbs(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vmovsd(tmp.reg, luauRegValue(arg)); - build.vandpd(tmp.reg, tmp.reg, build.i64(~(1LL << 63))); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - static void emitBuiltinMathSingleArgFunc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int32_t offset) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.call(qword[rNativeContext + offset]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.call(qword[rNativeContext + offset]); build.vmovsd(luauRegValue(ra), xmm0); } @@ -64,20 +36,10 @@ void emitBuiltinMathExp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npar void emitBuiltinMathFmod(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.vmovsd(xmm1, qword[args + offsetof(TValue, value)]); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_fmod)]); - - build.vmovsd(luauRegValue(ra), xmm0); -} - -void emitBuiltinMathPow(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.vmovsd(xmm1, qword[args + offsetof(TValue, value)]); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::xmmword, qword[args + offsetof(TValue, value)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_fmod)]); build.vmovsd(luauRegValue(ra), xmm0); } @@ -129,10 +91,10 @@ void emitBuiltinMathTanh(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npa void emitBuiltinMathAtan2(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.vmovsd(xmm1, qword[args + offsetof(TValue, value)]); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_atan2)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::xmmword, qword[args + offsetof(TValue, value)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_atan2)]); build.vmovsd(luauRegValue(ra), xmm0); } @@ -194,46 +156,23 @@ void emitBuiltinMathLog(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npar void emitBuiltinMathLdexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); + ScopedRegX64 tmp{regs, SizeX64::qword}; + build.vcvttsd2si(tmp.reg, qword[args + offsetof(TValue, value)]); - if (build.abi == ABIX64::Windows) - build.vcvttsd2si(rArg2, qword[args + offsetof(TValue, value)]); - else - build.vcvttsd2si(rArg1, qword[args + offsetof(TValue, value)]); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_ldexp)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_ldexp)]); build.vmovsd(luauRegValue(ra), xmm0); } -void emitBuiltinMathRound(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp0{regs, SizeX64::xmmword}; - ScopedRegX64 tmp1{regs, SizeX64::xmmword}; - ScopedRegX64 tmp2{regs, SizeX64::xmmword}; - - build.vmovsd(tmp0.reg, luauRegValue(arg)); - build.vandpd(tmp1.reg, tmp0.reg, build.f64x2(-0.0, -0.0)); - build.vmovsd(tmp2.reg, build.i64(0x3fdfffffffffffff)); // 0.49999999999999994 - build.vorpd(tmp1.reg, tmp1.reg, tmp2.reg); - build.vaddsd(tmp0.reg, tmp0.reg, tmp1.reg); - build.vroundsd(tmp0.reg, tmp0.reg, tmp0.reg, RoundingModeX64::RoundToZero); - - build.vmovsd(luauRegValue(ra), tmp0.reg); -} - void emitBuiltinMathFrexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - - if (build.abi == ABIX64::Windows) - build.lea(rArg2, sTemporarySlot); - else - build.lea(rArg1, sTemporarySlot); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_frexp)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::qword, sTemporarySlot); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_frexp)]); build.vmovsd(luauRegValue(ra), xmm0); @@ -243,15 +182,10 @@ void emitBuiltinMathFrexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int np void emitBuiltinMathModf(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - - if (build.abi == ABIX64::Windows) - build.lea(rArg2, sTemporarySlot); - else - build.lea(rArg1, sTemporarySlot); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_modf)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::qword, sTemporarySlot); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_modf)]); build.vmovsd(xmm1, qword[sTemporarySlot + 0]); build.vmovsd(luauRegValue(ra), xmm1); @@ -301,12 +235,10 @@ void emitBuiltinType(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams void emitBuiltinTypeof(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(arg)); - - build.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(arg)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]); build.mov(luauRegValue(ra), rax); } @@ -328,22 +260,18 @@ void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int r case LBF_MATH_MIN: case LBF_MATH_MAX: case LBF_MATH_CLAMP: + case LBF_MATH_FLOOR: + case LBF_MATH_CEIL: + case LBF_MATH_SQRT: + case LBF_MATH_POW: + case LBF_MATH_ABS: + case LBF_MATH_ROUND: // These instructions are fully translated to IR break; - case LBF_MATH_FLOOR: - return emitBuiltinMathFloor(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_CEIL: - return emitBuiltinMathCeil(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_SQRT: - return emitBuiltinMathSqrt(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_ABS: - return emitBuiltinMathAbs(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_EXP: return emitBuiltinMathExp(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_FMOD: return emitBuiltinMathFmod(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_POW: - return emitBuiltinMathPow(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_ASIN: return emitBuiltinMathAsin(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_SIN: @@ -370,8 +298,6 @@ void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int r return emitBuiltinMathLog(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_LDEXP: return emitBuiltinMathLdexp(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_ROUND: - return emitBuiltinMathRound(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_FREXP: return emitBuiltinMathFrexp(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_MODF: diff --git a/CodeGen/src/EmitCommon.h b/CodeGen/src/EmitCommon.h index 3c41c271..a71eafd4 100644 --- a/CodeGen/src/EmitCommon.h +++ b/CodeGen/src/EmitCommon.h @@ -20,9 +20,16 @@ constexpr unsigned kOffsetOfInstructionC = 3; // Leaf functions that are placed in every module to perform common instruction sequences struct ModuleHelpers { + // A64/X64 Label exitContinueVm; Label exitNoContinueVm; + + // X64 Label continueCallInVm; + + // A64 + Label reentry; // x0: closure + Label interrupt; // x0: pc offset, x1: return address, x2: interrupt }; } // namespace CodeGen diff --git a/CodeGen/src/EmitCommonA64.cpp b/CodeGen/src/EmitCommonA64.cpp index 66810d37..2b4bbaba 100644 --- a/CodeGen/src/EmitCommonA64.cpp +++ b/CodeGen/src/EmitCommonA64.cpp @@ -11,6 +11,11 @@ namespace CodeGen namespace A64 { +void emitUpdateBase(AssemblyBuilderA64& build) +{ + build.ldr(rBase, mem(rState, offsetof(lua_State, base))); +} + void emitExit(AssemblyBuilderA64& build, bool continueInVm) { build.mov(x0, continueInVm); @@ -18,56 +23,82 @@ void emitExit(AssemblyBuilderA64& build, bool continueInVm) build.br(x1); } -void emitUpdateBase(AssemblyBuilderA64& build) +void emitInterrupt(AssemblyBuilderA64& build) { - build.ldr(rBase, mem(rState, offsetof(lua_State, base))); -} + // x0 = pc offset + // x1 = return address in native code + // x2 = interrupt -void emitSetSavedPc(AssemblyBuilderA64& build, int pcpos) -{ - if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate) - { - build.add(x0, rCode, uint16_t(pcpos * sizeof(Instruction))); - } - else - { - build.mov(x0, pcpos * sizeof(Instruction)); - build.add(x0, rCode, x0); - } + // Stash return address in rBase; we need to reload rBase anyway + build.mov(rBase, x1); + // Update savedpc; required in case interrupt errors + build.add(x0, rCode, x0); build.ldr(x1, mem(rState, offsetof(lua_State, ci))); build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); -} - -void emitInterrupt(AssemblyBuilderA64& build, int pcpos) -{ - Label skip; - - build.ldr(x2, mem(rState, offsetof(lua_State, global))); - build.ldr(x2, mem(x2, offsetof(global_State, cb.interrupt))); - build.cbz(x2, skip); - - emitSetSavedPc(build, pcpos + 1); // uses x0/x1 // Call interrupt - // TODO: This code should be outlined so that it can be shared by multiple interruptible instructions build.mov(x0, rState); build.mov(w1, -1); build.blr(x2); // Check if we need to exit + Label skip; build.ldrb(w0, mem(rState, offsetof(lua_State, status))); build.cbz(w0, skip); // L->ci->savedpc-- - build.ldr(x0, mem(rState, offsetof(lua_State, ci))); - build.ldr(x1, mem(x0, offsetof(CallInfo, savedpc))); - build.sub(x1, x1, sizeof(Instruction)); - build.str(x1, mem(x0, offsetof(CallInfo, savedpc))); + // note: recomputing this avoids having to stash x0 + build.ldr(x1, mem(rState, offsetof(lua_State, ci))); + build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc))); + build.sub(x0, x0, sizeof(Instruction)); + build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); emitExit(build, /* continueInVm */ false); build.setLabel(skip); + + // Return back to caller; rBase has stashed return address + build.mov(x0, rBase); + + emitUpdateBase(build); // interrupt may have reallocated stack + + build.br(x0); +} + +void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers) +{ + // x0 = closure object to reentry (equal to clvalue(L->ci->func)) + + // If the fallback requested an exit, we need to do this right away + build.cbz(x0, helpers.exitNoContinueVm); + + emitUpdateBase(build); + + // Need to update state of the current function before we jump away + build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto + + build.mov(rClosure, x0); + build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k + build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code + + // Get instruction index from instruction pointer + // To get instruction index from instruction pointer, we need to divide byte offset by 4 + // But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out + build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci + build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc + build.sub(x2, x2, rCode); + build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads + + // We need to check if the new function can be executed natively + // TODO: This can be done earlier in the function flow, to reduce the JIT->VM transition penalty + build.ldr(x1, mem(x1, offsetofProtoExecData)); + build.cbz(x1, helpers.exitContinueVm); + + // Get new instruction location and jump to it + build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets))); + build.ldr(x1, mem(x1, x2)); + build.br(x1); } } // namespace A64 diff --git a/CodeGen/src/EmitCommonA64.h b/CodeGen/src/EmitCommonA64.h index 251f6a35..5ca9c558 100644 --- a/CodeGen/src/EmitCommonA64.h +++ b/CodeGen/src/EmitCommonA64.h @@ -11,7 +11,7 @@ // AArch64 ABI reminder: // Arguments: x0-x7, v0-v7 // Return: x0, v0 (or x8 that points to the address of the resulting structure) -// Volatile: x9-x14, v16-v31 ("caller-saved", any call may change them) +// Volatile: x9-x15, v16-v31 ("caller-saved", any call may change them) // Non-volatile: x19-x28, v8-v15 ("callee-saved", preserved after calls, only bottom half of SIMD registers is preserved!) // Reserved: x16-x18: reserved for linker/platform use; x29: frame pointer (unless omitted); x30: link register; x31: stack pointer @@ -25,52 +25,27 @@ struct NativeState; namespace A64 { -// Data that is very common to access is placed in non-volatile registers +// Data that is very common to access is placed in non-volatile registers: +// 1. Constant registers (only loaded during codegen entry) constexpr RegisterA64 rState = x19; // lua_State* L -constexpr RegisterA64 rBase = x20; // StkId base -constexpr RegisterA64 rNativeContext = x21; // NativeContext* context -constexpr RegisterA64 rConstants = x22; // TValue* k -constexpr RegisterA64 rClosure = x23; // Closure* cl -constexpr RegisterA64 rCode = x24; // Instruction* code +constexpr RegisterA64 rNativeContext = x20; // NativeContext* context + +// 2. Frame registers (reloaded when call frame changes; rBase is also reloaded after all calls that may reallocate stack) +constexpr RegisterA64 rConstants = x21; // TValue* k +constexpr RegisterA64 rClosure = x22; // Closure* cl +constexpr RegisterA64 rCode = x23; // Instruction* code +constexpr RegisterA64 rBase = x24; // StkId base // Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point // See CodeGenA64.cpp for layout constexpr unsigned kStackSize = 64; // 8 stashed registers -inline AddressA64 luauReg(int ri) -{ - return mem(rBase, ri * sizeof(TValue)); -} - -inline AddressA64 luauRegValue(int ri) -{ - return mem(rBase, ri * sizeof(TValue) + offsetof(TValue, value)); -} - -inline AddressA64 luauRegTag(int ri) -{ - return mem(rBase, ri * sizeof(TValue) + offsetof(TValue, tt)); -} - -inline AddressA64 luauConstant(int ki) -{ - return mem(rConstants, ki * sizeof(TValue)); -} - -inline AddressA64 luauConstantTag(int ki) -{ - return mem(rConstants, ki * sizeof(TValue) + offsetof(TValue, tt)); -} - -inline AddressA64 luauConstantValue(int ki) -{ - return mem(rConstants, ki * sizeof(TValue) + offsetof(TValue, value)); -} - -void emitExit(AssemblyBuilderA64& build, bool continueInVm); void emitUpdateBase(AssemblyBuilderA64& build); -void emitSetSavedPc(AssemblyBuilderA64& build, int pcpos); // invalidates x0/x1 -void emitInterrupt(AssemblyBuilderA64& build, int pcpos); + +// TODO: Move these to CodeGenA64 so that they can't be accidentally called during lowering +void emitExit(AssemblyBuilderA64& build, bool continueInVm); +void emitInterrupt(AssemblyBuilderA64& build); +void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers); } // namespace A64 } // namespace CodeGen diff --git a/CodeGen/src/EmitCommonX64.cpp b/CodeGen/src/EmitCommonX64.cpp index e9cfdc48..7db4068d 100644 --- a/CodeGen/src/EmitCommonX64.cpp +++ b/CodeGen/src/EmitCommonX64.cpp @@ -2,7 +2,9 @@ #include "EmitCommonX64.h" #include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrCallWrapperX64.h" #include "Luau/IrData.h" +#include "Luau/IrRegAllocX64.h" #include "CustomExecUtils.h" #include "NativeState.h" @@ -64,18 +66,19 @@ void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, } } -void jumpOnAnyCmpFallback(AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label) +void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(ra)); - build.lea(rArg3, luauRegAddress(rb)); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual) - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]); else if (cond == IrCondition::NotLess || cond == IrCondition::Less) - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]); else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal) - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]); else LUAU_ASSERT(!"Unsupported condition"); @@ -119,68 +122,66 @@ void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, Regi build.jcc(ConditionX64::NotZero, label); } -void callArithHelper(AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm) +void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm) { - if (build.abi == ABIX64::Windows) - build.mov(sArg5, tm); - else - build.mov(rArg5, tm); - - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(ra)); - build.lea(rArg3, luauRegAddress(rb)); - build.lea(rArg4, c); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.addArgument(SizeX64::qword, c); + callWrap.addArgument(SizeX64::dword, tm); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]); emitUpdateBase(build); } -void callLengthHelper(AssemblyBuilderX64& build, int ra, int rb) +void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(ra)); - build.lea(rArg3, luauRegAddress(rb)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]); emitUpdateBase(build); } -void callPrepareForN(AssemblyBuilderX64& build, int limit, int step, int init) +void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(limit)); - build.lea(rArg3, luauRegAddress(step)); - build.lea(rArg4, luauRegAddress(init)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_prepareFORN)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(limit)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(step)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(init)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_prepareFORN)]); } -void callGetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) +void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(rb)); - build.lea(rArg3, c); - build.lea(rArg4, luauRegAddress(ra)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.addArgument(SizeX64::qword, c); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]); emitUpdateBase(build); } -void callSetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) +void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(rb)); - build.lea(rArg3, c); - build.lea(rArg4, luauRegAddress(ra)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.addArgument(SizeX64::qword, c); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]); emitUpdateBase(build); } -// works for luaC_barriertable, luaC_barrierf -static void callBarrierImpl(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip, int contextOffset) +void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip) { - LUAU_ASSERT(tmp != object); - // iscollectable(ra) build.cmp(luauRegTag(ra), LUA_TSTRING); build.jcc(ConditionX64::Less, skip); @@ -193,88 +194,52 @@ static void callBarrierImpl(AssemblyBuilderX64& build, RegisterX64 tmp, Register build.mov(tmp, luauRegValue(ra)); build.test(byte[tmp + offsetof(GCheader, marked)], bit2mask(WHITE0BIT, WHITE1BIT)); build.jcc(ConditionX64::Zero, skip); - - // TODO: even with re-ordering we have a chance of failure, we have a task to fix this in the future - if (object == rArg3) - { - LUAU_ASSERT(tmp != rArg2); - - if (rArg2 != object) - build.mov(rArg2, object); - - if (rArg3 != tmp) - build.mov(rArg3, tmp); - } - else - { - if (rArg3 != tmp) - build.mov(rArg3, tmp); - - if (rArg2 != object) - build.mov(rArg2, object); - } - - build.mov(rArg1, rState); - build.call(qword[rNativeContext + contextOffset]); } -void callBarrierTable(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 table, int ra, Label& skip) +void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra, Label& skip) { - callBarrierImpl(build, tmp, table, ra, skip, offsetof(NativeContext, luaC_barriertable)); + ScopedRegX64 tmp{regs, SizeX64::qword}; + checkObjectBarrierConditions(build, tmp.reg, object, ra, skip); + + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, object, objectOp); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierf)]); } -void callBarrierObject(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip) -{ - callBarrierImpl(build, tmp, object, ra, skip, offsetof(NativeContext, luaC_barrierf)); -} - -void callBarrierTableFast(AssemblyBuilderX64& build, RegisterX64 table, Label& skip) +void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp, Label& skip) { // isblack(obj2gco(t)) build.test(byte[table + offsetof(GCheader, marked)], bitmask(BLACKBIT)); build.jcc(ConditionX64::Zero, skip); - // Argument setup re-ordered to avoid conflicts with table register - if (table != rArg2) - build.mov(rArg2, table); - build.lea(rArg3, addr[rArg2 + offsetof(Table, gclist)]); - build.mov(rArg1, rState); - build.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, table, tableOp); + callWrap.addArgument(SizeX64::qword, addr[table + offsetof(Table, gclist)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]); } -void callCheckGc(AssemblyBuilderX64& build, int pcpos, bool savepc, Label& skip) +void callCheckGc(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& skip) { - build.mov(rax, qword[rState + offsetof(lua_State, global)]); - build.mov(rdx, qword[rax + offsetof(global_State, totalbytes)]); - build.cmp(rdx, qword[rax + offsetof(global_State, GCthreshold)]); - build.jcc(ConditionX64::Below, skip); + { + ScopedRegX64 tmp1{regs, SizeX64::qword}; + ScopedRegX64 tmp2{regs, SizeX64::qword}; - if (savepc) - emitSetSavedPc(build, pcpos + 1); - - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), 1); - build.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]); + build.mov(tmp1.reg, qword[rState + offsetof(lua_State, global)]); + build.mov(tmp2.reg, qword[tmp1.reg + offsetof(global_State, totalbytes)]); + build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(global_State, GCthreshold)]); + build.jcc(ConditionX64::Below, skip); + } + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, 1); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]); emitUpdateBase(build); } -void callGetFastTmOrFallback(AssemblyBuilderX64& build, RegisterX64 table, TMS tm, Label& fallback) -{ - build.mov(rArg1, qword[table + offsetof(Table, metatable)]); - build.test(rArg1, rArg1); - build.jcc(ConditionX64::Zero, fallback); // no metatable - - build.test(byte[rArg1 + offsetof(Table, tmcache)], 1 << tm); - build.jcc(ConditionX64::NotZero, fallback); // no tag method - - // rArg1 is already prepared - build.mov(rArg2, tm); - build.mov(rax, qword[rState + offsetof(lua_State, global)]); - build.mov(rArg3, qword[rax + offsetof(global_State, tmname) + tm * sizeof(TString*)]); - build.call(qword[rNativeContext + offsetof(NativeContext, luaT_gettm)]); -} - void emitExit(AssemblyBuilderX64& build, bool continueInVm) { if (continueInVm) @@ -317,6 +282,8 @@ void emitInterrupt(AssemblyBuilderX64& build, int pcpos) build.mov(dwordReg(rArg2), -1); // function accepts 'int' here and using qword reg would've forced 8 byte constant here build.call(r8); + emitUpdateBase(build); // interrupt may have reallocated stack + // Check if we need to exit build.mov(al, byte[rState + offsetof(lua_State, status)]); build.test(al, al); diff --git a/CodeGen/src/EmitCommonX64.h b/CodeGen/src/EmitCommonX64.h index 6b676255..85045ad5 100644 --- a/CodeGen/src/EmitCommonX64.h +++ b/CodeGen/src/EmitCommonX64.h @@ -27,10 +27,13 @@ namespace CodeGen enum class IrCondition : uint8_t; struct NativeState; +struct IrOp; namespace X64 { +struct IrRegAllocX64; + // Data that is very common to access is placed in non-volatile registers constexpr RegisterX64 rState = r15; // lua_State* L constexpr RegisterX64 rBase = r14; // StkId base @@ -233,21 +236,20 @@ inline void jumpIfNodeKeyNotInExpectedSlot(AssemblyBuilderX64& build, RegisterX6 } void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label); -void jumpOnAnyCmpFallback(AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label); +void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label); void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos); void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label); -void callArithHelper(AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm); -void callLengthHelper(AssemblyBuilderX64& build, int ra, int rb); -void callPrepareForN(AssemblyBuilderX64& build, int limit, int step, int init); -void callGetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); -void callSetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); -void callBarrierTable(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 table, int ra, Label& skip); -void callBarrierObject(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip); -void callBarrierTableFast(AssemblyBuilderX64& build, RegisterX64 table, Label& skip); -void callCheckGc(AssemblyBuilderX64& build, int pcpos, bool savepc, Label& skip); -void callGetFastTmOrFallback(AssemblyBuilderX64& build, RegisterX64 table, TMS tm, Label& fallback); +void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm); +void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb); +void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init); +void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); +void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); +void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip); +void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra, Label& skip); +void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp, Label& skip); +void callCheckGc(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& skip); void emitExit(AssemblyBuilderX64& build, bool continueInVm); void emitUpdateBase(AssemblyBuilderX64& build); diff --git a/CodeGen/src/EmitInstructionA64.cpp b/CodeGen/src/EmitInstructionA64.cpp index 8289ee2e..400ba77e 100644 --- a/CodeGen/src/EmitInstructionA64.cpp +++ b/CodeGen/src/EmitInstructionA64.cpp @@ -23,35 +23,50 @@ void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, i build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback))); build.blr(x3); + // reentry with x0=closure (NULL will trigger exit) + build.b(helpers.reentry); +} + +void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults) +{ + // argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams; + if (nparams == LUA_MULTRET) + build.ldr(x2, mem(rState, offsetof(lua_State, top))); + else + build.add(x2, rBase, uint16_t((ra + 1 + nparams) * sizeof(TValue))); + + // callFallback(L, ra, argtop, nresults) + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(ra * sizeof(TValue))); + build.mov(w3, nresults); + build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback))); + build.blr(x4); + + // reentry with x0=closure (NULL will trigger exit) + build.b(helpers.reentry); +} + +void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux) +{ + // luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false) + build.mov(x0, rState); + build.ldr(x1, mem(rClosure, offsetof(Closure, env))); + build.mov(x2, rConstants); + build.mov(w3, aux); + build.mov(w4, 0); + build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_getimport))); + build.blr(x5); + emitUpdateBase(build); - // If the fallback requested an exit, we need to do this right away - build.cbz(x0, helpers.exitNoContinueVm); + // setobj2s(L, ra, L->top - 1) + build.ldr(x0, mem(rState, offsetof(lua_State, top))); + build.sub(x0, x0, sizeof(TValue)); + build.ldr(q0, x0); + build.str(q0, mem(rBase, ra * sizeof(TValue))); - // Need to update state of the current function before we jump away - build.ldr(x1, mem(rState, offsetof(lua_State, ci))); // L->ci - build.ldr(x1, mem(x1, offsetof(CallInfo, func))); // L->ci->func - build.ldr(rClosure, mem(x1, offsetof(TValue, value.gc))); // L->ci->func->value.gc aka cl - - build.ldr(x1, mem(rClosure, offsetof(Closure, l.p))); // cl->l.p aka proto - - build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k - build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code - - // Get instruction index from instruction pointer - // To get instruction index from instruction pointer, we need to divide byte offset by 4 - // But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out - build.sub(x2, x0, rCode); - build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads - - // We need to check if the new function can be executed natively - build.ldr(x1, mem(x1, offsetofProtoExecData)); - build.cbz(x1, helpers.exitContinueVm); - - // Get new instruction location and jump to it - build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets))); - build.ldr(x1, mem(x1, x2)); - build.br(x1); + // L->top-- + build.str(x0, mem(rState, offsetof(lua_State, top))); } } // namespace A64 diff --git a/CodeGen/src/EmitInstructionA64.h b/CodeGen/src/EmitInstructionA64.h index 7f15d819..278d8e8e 100644 --- a/CodeGen/src/EmitInstructionA64.h +++ b/CodeGen/src/EmitInstructionA64.h @@ -1,6 +1,8 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #pragma once +#include + namespace Luau { namespace CodeGen @@ -14,6 +16,8 @@ namespace A64 class AssemblyBuilderA64; void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int n); +void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults); +void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux); } // namespace A64 } // namespace CodeGen diff --git a/CodeGen/src/EmitInstructionX64.cpp b/CodeGen/src/EmitInstructionX64.cpp index 649498f5..b645f9f7 100644 --- a/CodeGen/src/EmitInstructionX64.cpp +++ b/CodeGen/src/EmitInstructionX64.cpp @@ -2,6 +2,7 @@ #include "EmitInstructionX64.h" #include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrRegAllocX64.h" #include "CustomExecUtils.h" #include "EmitCommonX64.h" @@ -315,7 +316,7 @@ void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, i build.jmp(qword[rdx + rax * 2]); } -void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index) +void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index) { OperandX64 last = index + count - 1; @@ -346,7 +347,7 @@ void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int Label skipResize; - RegisterX64 table = rax; + RegisterX64 table = regs.takeReg(rax); build.mov(table, luauRegValue(ra)); @@ -411,7 +412,7 @@ void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int build.setLabel(endLoop); } - callBarrierTableFast(build, table, next); + callBarrierTableFast(regs, build, table, {}, next); } void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit) @@ -483,10 +484,8 @@ void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRep build.jcc(ConditionX64::NotZero, loopRepeat); } -void emitinstForGLoopFallback(AssemblyBuilderX64& build, int pcpos, int ra, int aux, Label& loopRepeat) +void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat) { - emitSetSavedPc(build, pcpos + 1); - build.mov(rArg1, rState); build.mov(dwordReg(rArg2), ra); build.mov(dwordReg(rArg3), aux); diff --git a/CodeGen/src/EmitInstructionX64.h b/CodeGen/src/EmitInstructionX64.h index 880c9fa4..cc1b8645 100644 --- a/CodeGen/src/EmitInstructionX64.h +++ b/CodeGen/src/EmitInstructionX64.h @@ -15,12 +15,13 @@ namespace X64 { class AssemblyBuilderX64; +struct IrRegAllocX64; void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults); void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults); -void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index); +void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index); void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit); -void emitinstForGLoopFallback(AssemblyBuilderX64& build, int pcpos, int ra, int aux, Label& loopRepeat); +void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat); void emitInstForGPrepXnextFallback(AssemblyBuilderX64& build, int pcpos, int ra, Label& target); void emitInstAnd(AssemblyBuilderX64& build, int ra, int rb, int rc); void emitInstAndK(AssemblyBuilderX64& build, int ra, int rb, int kc); diff --git a/CodeGen/src/IrAnalysis.cpp b/CodeGen/src/IrAnalysis.cpp index 6e77dfe4..b248b97d 100644 --- a/CodeGen/src/IrAnalysis.cpp +++ b/CodeGen/src/IrAnalysis.cpp @@ -300,17 +300,17 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& if (function.boolOp(inst.b)) capturedRegs.set(inst.a.index, true); break; - case IrCmd::LOP_SETLIST: + case IrCmd::SETLIST: use(inst.b); useRange(inst.c.index, function.intOp(inst.d)); break; - case IrCmd::LOP_CALL: + case IrCmd::CALL: use(inst.a); useRange(inst.a.index + 1, function.intOp(inst.b)); defRange(inst.a.index, function.intOp(inst.c)); break; - case IrCmd::LOP_RETURN: + case IrCmd::RETURN: useRange(inst.a.index, function.intOp(inst.b)); break; case IrCmd::FASTCALL: @@ -341,7 +341,7 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& if (int count = function.intOp(inst.f); count != -1) defRange(inst.b.index, count); break; - case IrCmd::LOP_FORGLOOP: + case IrCmd::FORGLOOP: // First register is not used by instruction, we check that it's still 'nil' with CHECK_TAG use(inst.a, 1); use(inst.a, 2); @@ -349,26 +349,26 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& def(inst.a, 2); defRange(inst.a.index + 3, function.intOp(inst.b)); break; - case IrCmd::LOP_FORGLOOP_FALLBACK: - useRange(inst.b.index, 3); + case IrCmd::FORGLOOP_FALLBACK: + useRange(inst.a.index, 3); - def(inst.b, 2); - defRange(inst.b.index + 3, uint8_t(function.intOp(inst.c))); // ignore most significant bit + def(inst.a, 2); + defRange(inst.a.index + 3, uint8_t(function.intOp(inst.b))); // ignore most significant bit break; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: use(inst.b); break; // A <- B, C - case IrCmd::LOP_AND: - case IrCmd::LOP_OR: + case IrCmd::AND: + case IrCmd::OR: use(inst.b); use(inst.c); def(inst.a); break; // A <- B - case IrCmd::LOP_ANDK: - case IrCmd::LOP_ORK: + case IrCmd::ANDK: + case IrCmd::ORK: use(inst.b); def(inst.a); diff --git a/CodeGen/src/IrBuilder.cpp b/CodeGen/src/IrBuilder.cpp index 239f7a8e..4fee080b 100644 --- a/CodeGen/src/IrBuilder.cpp +++ b/CodeGen/src/IrBuilder.cpp @@ -135,7 +135,7 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) inst(IrCmd::INTERRUPT, constUint(i)); inst(IrCmd::SET_SAVEDPC, constUint(i + 1)); - inst(IrCmd::LOP_CALL, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1), constInt(LUAU_INSN_C(*pc) - 1)); + inst(IrCmd::CALL, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1), constInt(LUAU_INSN_C(*pc) - 1)); if (activeFastcallFallback) { @@ -149,7 +149,7 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) case LOP_RETURN: inst(IrCmd::INTERRUPT, constUint(i)); - inst(IrCmd::LOP_RETURN, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1)); + inst(IrCmd::RETURN, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1)); break; case LOP_GETTABLE: translateInstGetTable(*this, pc, i); @@ -266,7 +266,7 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) translateInstDupTable(*this, pc, i); break; case LOP_SETLIST: - inst(IrCmd::LOP_SETLIST, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), constInt(LUAU_INSN_C(*pc) - 1), constUint(pc[1])); + inst(IrCmd::SETLIST, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), constInt(LUAU_INSN_C(*pc) - 1), constUint(pc[1])); break; case LOP_GETUPVAL: translateInstGetUpval(*this, pc, i); @@ -347,10 +347,11 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) inst(IrCmd::INTERRUPT, constUint(i)); loadAndCheckTag(vmReg(ra), LUA_TNIL, fallback); - inst(IrCmd::LOP_FORGLOOP, vmReg(ra), constInt(aux), loopRepeat, loopExit); + inst(IrCmd::FORGLOOP, vmReg(ra), constInt(aux), loopRepeat, loopExit); beginBlock(fallback); - inst(IrCmd::LOP_FORGLOOP_FALLBACK, constUint(i), vmReg(ra), constInt(aux), loopRepeat, loopExit); + inst(IrCmd::SET_SAVEDPC, constUint(i + 1)); + inst(IrCmd::FORGLOOP_FALLBACK, vmReg(ra), constInt(aux), loopRepeat, loopExit); beginBlock(loopExit); } @@ -363,19 +364,19 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) translateInstForGPrepInext(*this, pc, i); break; case LOP_AND: - inst(IrCmd::LOP_AND, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); + inst(IrCmd::AND, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); break; case LOP_ANDK: - inst(IrCmd::LOP_ANDK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); + inst(IrCmd::ANDK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); break; case LOP_OR: - inst(IrCmd::LOP_OR, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); + inst(IrCmd::OR, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); break; case LOP_ORK: - inst(IrCmd::LOP_ORK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); + inst(IrCmd::ORK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); break; case LOP_COVERAGE: - inst(IrCmd::LOP_COVERAGE, constUint(i)); + inst(IrCmd::COVERAGE, constUint(i)); break; case LOP_GETIMPORT: translateInstGetImport(*this, pc, i); diff --git a/CodeGen/src/IrCallWrapperX64.cpp b/CodeGen/src/IrCallWrapperX64.cpp new file mode 100644 index 00000000..4f0c0cf6 --- /dev/null +++ b/CodeGen/src/IrCallWrapperX64.cpp @@ -0,0 +1,400 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#include "Luau/IrCallWrapperX64.h" + +#include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrRegAllocX64.h" + +#include "EmitCommonX64.h" + +namespace Luau +{ +namespace CodeGen +{ +namespace X64 +{ + +static bool sameUnderlyingRegister(RegisterX64 a, RegisterX64 b) +{ + SizeX64 underlyingSizeA = a.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword; + SizeX64 underlyingSizeB = b.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword; + + return underlyingSizeA == underlyingSizeB && a.index == b.index; +} + +IrCallWrapperX64::IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx) + : regs(regs) + , build(build) + , instIdx(instIdx) + , funcOp(noreg) +{ + gprUses.fill(0); + xmmUses.fill(0); +} + +void IrCallWrapperX64::addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp) +{ + // Instruction operands rely on current instruction index for lifetime tracking + LUAU_ASSERT(instIdx != kInvalidInstIdx || sourceOp.kind == IrOpKind::None); + + LUAU_ASSERT(argCount < kMaxCallArguments); + args[argCount++] = {targetSize, source, sourceOp}; +} + +void IrCallWrapperX64::addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg) +{ + LUAU_ASSERT(argCount < kMaxCallArguments); + args[argCount++] = {targetSize, scopedReg.release(), {}}; +} + +void IrCallWrapperX64::call(const OperandX64& func) +{ + funcOp = func; + + assignTargetRegisters(); + + countRegisterUses(); + + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + // If source is the last use of IrInst, clear the register + // Source registers are recorded separately in CallArgument + if (arg.sourceOp.kind != IrOpKind::None) + { + if (IrInst* inst = regs.function.asInstOp(arg.sourceOp)) + { + if (regs.isLastUseReg(*inst, instIdx)) + inst->regX64 = noreg; + } + } + + // Immediate values are stored at the end since they are not interfering and target register can still be used temporarily + if (arg.source.cat == CategoryX64::imm) + { + arg.candidate = false; + } + // Arguments passed through stack can be handled immediately + else if (arg.target.cat == CategoryX64::mem) + { + if (arg.source.cat == CategoryX64::mem) + { + ScopedRegX64 tmp{regs, arg.target.memSize}; + + freeSourceRegisters(arg); + + build.mov(tmp.reg, arg.source); + build.mov(arg.target, tmp.reg); + } + else + { + freeSourceRegisters(arg); + + build.mov(arg.target, arg.source); + } + + arg.candidate = false; + } + // Skip arguments that are already in their place + else if (arg.source.cat == CategoryX64::reg && sameUnderlyingRegister(arg.target.base, arg.source.base)) + { + freeSourceRegisters(arg); + + // If target is not used as source in other arguments, prevent register allocator from giving it out + if (getRegisterUses(arg.target.base) == 0) + regs.takeReg(arg.target.base); + else // Otherwise, make sure we won't free it when last source use is completed + addRegisterUse(arg.target.base); + + arg.candidate = false; + } + } + + // Repeat until we run out of arguments to pass + while (true) + { + // Find target argument register that is not an active source + if (CallArgument* candidate = findNonInterferingArgument()) + { + // This section is only for handling register targets + LUAU_ASSERT(candidate->target.cat == CategoryX64::reg); + + freeSourceRegisters(*candidate); + + LUAU_ASSERT(getRegisterUses(candidate->target.base) == 0); + regs.takeReg(candidate->target.base); + + moveToTarget(*candidate); + + candidate->candidate = false; + } + // If all registers cross-interfere (rcx <- rdx, rdx <- rcx), one has to be renamed + else if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg) + { + // Get a fresh register + RegisterX64 freshReg = conflict.size == SizeX64::xmmword ? regs.allocXmmReg() : regs.allocGprReg(conflict.size); + + if (conflict.size == SizeX64::xmmword) + build.vmovsd(freshReg, conflict, conflict); + else + build.mov(freshReg, conflict); + + renameSourceRegisters(conflict, freshReg); + } + else + { + for (int i = 0; i < argCount; ++i) + LUAU_ASSERT(!args[i].candidate); + break; + } + } + + // Handle immediate arguments last + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.source.cat == CategoryX64::imm) + { + if (arg.target.cat == CategoryX64::reg) + regs.takeReg(arg.target.base); + + moveToTarget(arg); + } + } + + // Free registers used in the function call + removeRegisterUse(funcOp.base); + removeRegisterUse(funcOp.index); + + // Just before the call is made, argument registers are all marked as free in register allocator + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.target.cat == CategoryX64::reg) + regs.freeReg(arg.target.base); + } + + build.call(funcOp); +} + +void IrCallWrapperX64::assignTargetRegisters() +{ + static const std::array kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]}; + static const std::array kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9}; + + const std::array& gprOrder = build.abi == ABIX64::Windows ? kWindowsGprOrder : kSystemvGprOrder; + static const std::array kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV + + int gprPos = 0; + int xmmPos = 0; + + for (int i = 0; i < argCount; i++) + { + CallArgument& arg = args[i]; + + if (arg.targetSize == SizeX64::xmmword) + { + LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size()); + arg.target = kXmmOrder[xmmPos++]; + + if (build.abi == ABIX64::Windows) + gprPos++; // On Windows, gpr/xmm register positions move in sync + } + else + { + LUAU_ASSERT(size_t(gprPos) < gprOrder.size()); + arg.target = gprOrder[gprPos++]; + + if (build.abi == ABIX64::Windows) + xmmPos++; // On Windows, gpr/xmm register positions move in sync + + // Keep requested argument size + if (arg.target.cat == CategoryX64::reg) + arg.target.base.size = arg.targetSize; + else if (arg.target.cat == CategoryX64::mem) + arg.target.memSize = arg.targetSize; + } + } +} + +void IrCallWrapperX64::countRegisterUses() +{ + for (int i = 0; i < argCount; ++i) + { + addRegisterUse(args[i].source.base); + addRegisterUse(args[i].source.index); + } + + addRegisterUse(funcOp.base); + addRegisterUse(funcOp.index); +} + +CallArgument* IrCallWrapperX64::findNonInterferingArgument() +{ + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.candidate && !interferesWithActiveSources(arg, i) && !interferesWithOperand(funcOp, arg.target.base)) + return &arg; + } + + return nullptr; +} + +bool IrCallWrapperX64::interferesWithOperand(const OperandX64& op, RegisterX64 reg) const +{ + return sameUnderlyingRegister(op.base, reg) || sameUnderlyingRegister(op.index, reg); +} + +bool IrCallWrapperX64::interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const +{ + for (int i = 0; i < argCount; ++i) + { + const CallArgument& arg = args[i]; + + if (arg.candidate && i != targetArgIndex && interferesWithOperand(arg.source, targetArg.target.base)) + return true; + } + + return false; +} + +bool IrCallWrapperX64::interferesWithActiveTarget(RegisterX64 sourceReg) const +{ + for (int i = 0; i < argCount; ++i) + { + const CallArgument& arg = args[i]; + + if (arg.candidate && sameUnderlyingRegister(arg.target.base, sourceReg)) + return true; + } + + return false; +} + +void IrCallWrapperX64::moveToTarget(CallArgument& arg) +{ + if (arg.source.cat == CategoryX64::reg) + { + RegisterX64 source = arg.source.base; + + if (source.size == SizeX64::xmmword) + build.vmovsd(arg.target, source, source); + else + build.mov(arg.target, source); + } + else if (arg.source.cat == CategoryX64::imm) + { + build.mov(arg.target, arg.source); + } + else + { + if (arg.source.memSize == SizeX64::none) + build.lea(arg.target, arg.source); + else if (arg.target.base.size == SizeX64::xmmword && arg.source.memSize == SizeX64::xmmword) + build.vmovups(arg.target, arg.source); + else if (arg.target.base.size == SizeX64::xmmword) + build.vmovsd(arg.target, arg.source); + else + build.mov(arg.target, arg.source); + } +} + +void IrCallWrapperX64::freeSourceRegisters(CallArgument& arg) +{ + removeRegisterUse(arg.source.base); + removeRegisterUse(arg.source.index); +} + +void IrCallWrapperX64::renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement) +{ + if (sameUnderlyingRegister(target, reg)) + { + addRegisterUse(replacement); + removeRegisterUse(target); + + target.index = replacement.index; // Only change index, size is preserved + } +} + +void IrCallWrapperX64::renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement) +{ + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.candidate) + { + renameRegister(arg.source.base, reg, replacement); + renameRegister(arg.source.index, reg, replacement); + } + } + + renameRegister(funcOp.base, reg, replacement); + renameRegister(funcOp.index, reg, replacement); +} + +RegisterX64 IrCallWrapperX64::findConflictingTarget() const +{ + for (int i = 0; i < argCount; ++i) + { + const CallArgument& arg = args[i]; + + if (arg.candidate) + { + if (interferesWithActiveTarget(arg.source.base)) + return arg.source.base; + + if (interferesWithActiveTarget(arg.source.index)) + return arg.source.index; + } + } + + if (interferesWithActiveTarget(funcOp.base)) + return funcOp.base; + + if (interferesWithActiveTarget(funcOp.index)) + return funcOp.index; + + return noreg; +} + +int IrCallWrapperX64::getRegisterUses(RegisterX64 reg) const +{ + return reg.size == SizeX64::xmmword ? xmmUses[reg.index] : (reg.size != SizeX64::none ? gprUses[reg.index] : 0); +} + +void IrCallWrapperX64::addRegisterUse(RegisterX64 reg) +{ + if (reg.size == SizeX64::xmmword) + xmmUses[reg.index]++; + else if (reg.size != SizeX64::none) + gprUses[reg.index]++; +} + +void IrCallWrapperX64::removeRegisterUse(RegisterX64 reg) +{ + if (reg.size == SizeX64::xmmword) + { + LUAU_ASSERT(xmmUses[reg.index] != 0); + xmmUses[reg.index]--; + + if (xmmUses[reg.index] == 0) // we don't use persistent xmm regs so no need to call shouldFreeRegister + regs.freeReg(reg); + } + else if (reg.size != SizeX64::none) + { + LUAU_ASSERT(gprUses[reg.index] != 0); + gprUses[reg.index]--; + + if (gprUses[reg.index] == 0 && regs.shouldFreeGpr(reg)) + regs.freeReg(reg); + } +} + +} // namespace X64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/src/IrDump.cpp b/CodeGen/src/IrDump.cpp index 53654d6a..fb56df8c 100644 --- a/CodeGen/src/IrDump.cpp +++ b/CodeGen/src/IrDump.cpp @@ -126,6 +126,16 @@ const char* getCmdName(IrCmd cmd) return "MAX_NUM"; case IrCmd::UNM_NUM: return "UNM_NUM"; + case IrCmd::FLOOR_NUM: + return "FLOOR_NUM"; + case IrCmd::CEIL_NUM: + return "CEIL_NUM"; + case IrCmd::ROUND_NUM: + return "ROUND_NUM"; + case IrCmd::SQRT_NUM: + return "SQRT_NUM"; + case IrCmd::ABS_NUM: + return "ABS_NUM"; case IrCmd::NOT_ANY: return "NOT_ANY"; case IrCmd::JUMP: @@ -216,28 +226,28 @@ const char* getCmdName(IrCmd cmd) return "CLOSE_UPVALS"; case IrCmd::CAPTURE: return "CAPTURE"; - case IrCmd::LOP_SETLIST: - return "LOP_SETLIST"; - case IrCmd::LOP_CALL: - return "LOP_CALL"; - case IrCmd::LOP_RETURN: - return "LOP_RETURN"; - case IrCmd::LOP_FORGLOOP: - return "LOP_FORGLOOP"; - case IrCmd::LOP_FORGLOOP_FALLBACK: - return "LOP_FORGLOOP_FALLBACK"; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: - return "LOP_FORGPREP_XNEXT_FALLBACK"; - case IrCmd::LOP_AND: - return "LOP_AND"; - case IrCmd::LOP_ANDK: - return "LOP_ANDK"; - case IrCmd::LOP_OR: - return "LOP_OR"; - case IrCmd::LOP_ORK: - return "LOP_ORK"; - case IrCmd::LOP_COVERAGE: - return "LOP_COVERAGE"; + case IrCmd::SETLIST: + return "SETLIST"; + case IrCmd::CALL: + return "CALL"; + case IrCmd::RETURN: + return "RETURN"; + case IrCmd::FORGLOOP: + return "FORGLOOP"; + case IrCmd::FORGLOOP_FALLBACK: + return "FORGLOOP_FALLBACK"; + case IrCmd::FORGPREP_XNEXT_FALLBACK: + return "FORGPREP_XNEXT_FALLBACK"; + case IrCmd::AND: + return "AND"; + case IrCmd::ANDK: + return "ANDK"; + case IrCmd::OR: + return "OR"; + case IrCmd::ORK: + return "ORK"; + case IrCmd::COVERAGE: + return "COVERAGE"; case IrCmd::FALLBACK_GETGLOBAL: return "FALLBACK_GETGLOBAL"; case IrCmd::FALLBACK_SETGLOBAL: diff --git a/CodeGen/src/IrLoweringA64.cpp b/CodeGen/src/IrLoweringA64.cpp index ae4bc017..37f38157 100644 --- a/CodeGen/src/IrLoweringA64.cpp +++ b/CodeGen/src/IrLoweringA64.cpp @@ -13,6 +13,9 @@ #include "lstate.h" +// TODO: Eventually this can go away +// #define TRACE + namespace Luau { namespace CodeGen @@ -20,12 +23,67 @@ namespace CodeGen namespace A64 { +#ifdef TRACE +struct LoweringStatsA64 +{ + size_t can; + size_t total; + + ~LoweringStatsA64() + { + if (total) + printf("A64 lowering succeded for %.1f%% functions (%d/%d)\n", double(can) / double(total) * 100, int(can), int(total)); + } +} gStatsA64; +#endif + +inline ConditionA64 getConditionFP(IrCondition cond) +{ + switch (cond) + { + case IrCondition::Equal: + return ConditionA64::Equal; + + case IrCondition::NotEqual: + return ConditionA64::NotEqual; + + case IrCondition::Less: + return ConditionA64::Minus; + + case IrCondition::NotLess: + return ConditionA64::Plus; + + case IrCondition::LessEqual: + return ConditionA64::UnsignedLessEqual; + + case IrCondition::NotLessEqual: + return ConditionA64::UnsignedGreater; + + case IrCondition::Greater: + return ConditionA64::Greater; + + case IrCondition::NotGreater: + return ConditionA64::LessEqual; + + case IrCondition::GreaterEqual: + return ConditionA64::GreaterEqual; + + case IrCondition::NotGreaterEqual: + return ConditionA64::Less; + + default: + LUAU_ASSERT(!"Unexpected condition code"); + return ConditionA64::Always; + } +} + IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function) : build(build) , helpers(helpers) , data(data) , proto(proto) , function(function) + , regs(function, {{x0, x15}, {q0, q7}, {q16, q31}}) { // In order to allocate registers during lowering, we need to know where instruction results are last used updateLastUseLocations(function); @@ -34,20 +92,61 @@ IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, // TODO: Eventually this can go away bool IrLoweringA64::canLower(const IrFunction& function) { +#ifdef TRACE + gStatsA64.total++; +#endif + for (const IrInst& inst : function.instructions) { switch (inst.cmd) { case IrCmd::NOP: - case IrCmd::SUBSTITUTE: + case IrCmd::LOAD_TAG: + case IrCmd::LOAD_POINTER: + case IrCmd::LOAD_DOUBLE: + case IrCmd::LOAD_INT: + case IrCmd::LOAD_TVALUE: + case IrCmd::LOAD_NODE_VALUE_TV: + case IrCmd::LOAD_ENV: + case IrCmd::STORE_TAG: + case IrCmd::STORE_POINTER: + case IrCmd::STORE_DOUBLE: + case IrCmd::STORE_INT: + case IrCmd::STORE_TVALUE: + case IrCmd::STORE_NODE_VALUE_TV: + case IrCmd::ADD_NUM: + case IrCmd::SUB_NUM: + case IrCmd::MUL_NUM: + case IrCmd::DIV_NUM: + case IrCmd::MOD_NUM: + case IrCmd::UNM_NUM: + case IrCmd::JUMP: + case IrCmd::JUMP_EQ_TAG: + case IrCmd::JUMP_CMP_NUM: + case IrCmd::JUMP_CMP_ANY: + case IrCmd::DO_ARITH: + case IrCmd::GET_IMPORT: + case IrCmd::GET_UPVALUE: + case IrCmd::CHECK_TAG: + case IrCmd::CHECK_READONLY: + case IrCmd::CHECK_NO_METATABLE: + case IrCmd::CHECK_SAFE_ENV: case IrCmd::INTERRUPT: - case IrCmd::LOP_RETURN: + case IrCmd::SET_SAVEDPC: + case IrCmd::CALL: + case IrCmd::RETURN: + case IrCmd::SUBSTITUTE: continue; + default: return false; } } +#ifdef TRACE + gStatsA64.can++; +#endif + return true; } @@ -55,23 +154,338 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) { switch (inst.cmd) { + case IrCmd::LOAD_TAG: + { + inst.regA64 = regs.allocReg(KindA64::w); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_POINTER: + { + inst.regA64 = regs.allocReg(KindA64::x); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_DOUBLE: + { + inst.regA64 = regs.allocReg(KindA64::d); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_INT: + { + inst.regA64 = regs.allocReg(KindA64::w); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_TVALUE: + { + inst.regA64 = regs.allocReg(KindA64::q); + AddressA64 addr = tempAddr(inst.a, 0); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_NODE_VALUE_TV: + { + inst.regA64 = regs.allocReg(KindA64::q); + build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(LuaNode, val))); + break; + } + case IrCmd::LOAD_ENV: + inst.regA64 = regs.allocReg(KindA64::x); + build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env))); + break; + case IrCmd::STORE_TAG: + { + RegisterA64 temp = regs.allocTemp(KindA64::w); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt)); + build.mov(temp, tagOp(inst.b)); + build.str(temp, addr); + break; + } + case IrCmd::STORE_POINTER: + { + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.str(regOp(inst.b), addr); + break; + } + case IrCmd::STORE_DOUBLE: + { + RegisterA64 temp = tempDouble(inst.b); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.str(temp, addr); + break; + } + case IrCmd::STORE_INT: + { + RegisterA64 temp = tempInt(inst.b); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.str(temp, addr); + break; + } + case IrCmd::STORE_TVALUE: + { + AddressA64 addr = tempAddr(inst.a, 0); + build.str(regOp(inst.b), addr); + break; + } + case IrCmd::STORE_NODE_VALUE_TV: + build.str(regOp(inst.b), mem(regOp(inst.a), offsetof(LuaNode, val))); + break; + case IrCmd::ADD_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fadd(inst.regA64, temp1, temp2); + break; + } + case IrCmd::SUB_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fsub(inst.regA64, temp1, temp2); + break; + } + case IrCmd::MUL_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fmul(inst.regA64, temp1, temp2); + break; + } + case IrCmd::DIV_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fdiv(inst.regA64, temp1, temp2); + break; + } + case IrCmd::MOD_NUM: + { + inst.regA64 = regs.allocReg(KindA64::d); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fdiv(inst.regA64, temp1, temp2); + build.frintm(inst.regA64, inst.regA64); + build.fmul(inst.regA64, inst.regA64, temp2); + build.fsub(inst.regA64, temp1, inst.regA64); + break; + } + case IrCmd::UNM_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); + RegisterA64 temp = tempDouble(inst.a); + build.fneg(inst.regA64, temp); + break; + } + case IrCmd::JUMP: + jumpOrFallthrough(blockOp(inst.a), next); + break; + case IrCmd::JUMP_EQ_TAG: + if (inst.b.kind == IrOpKind::Constant) + build.cmp(regOp(inst.a), tagOp(inst.b)); + else if (inst.b.kind == IrOpKind::Inst) + build.cmp(regOp(inst.a), regOp(inst.b)); + else + LUAU_ASSERT(!"Unsupported instruction form"); + + if (isFallthroughBlock(blockOp(inst.d), next)) + { + build.b(ConditionA64::Equal, labelOp(inst.c)); + jumpOrFallthrough(blockOp(inst.d), next); + } + else + { + build.b(ConditionA64::NotEqual, labelOp(inst.d)); + jumpOrFallthrough(blockOp(inst.c), next); + } + break; + case IrCmd::JUMP_CMP_NUM: + { + IrCondition cond = conditionOp(inst.c); + + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + + build.fcmp(temp1, temp2); + build.b(getConditionFP(cond), labelOp(inst.d)); + jumpOrFallthrough(blockOp(inst.e), next); + break; + } + case IrCmd::JUMP_CMP_ANY: + { + IrCondition cond = conditionOp(inst.c); + + regs.assertAllFree(); + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); + + if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual) + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal))); + else if (cond == IrCondition::NotLess || cond == IrCondition::Less) + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan))); + else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal) + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval))); + else + LUAU_ASSERT(!"Unsupported condition"); + + build.blr(x3); + + emitUpdateBase(build); + + if (cond == IrCondition::NotLessEqual || cond == IrCondition::NotLess || cond == IrCondition::NotEqual) + build.cbz(x0, labelOp(inst.d)); + else + build.cbnz(x0, labelOp(inst.d)); + jumpOrFallthrough(blockOp(inst.e), next); + break; + } + case IrCmd::DO_ARITH: + regs.assertAllFree(); + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); + + if (inst.c.kind == IrOpKind::VmConst) + { + // TODO: refactor into a common helper + if (vmConstOp(inst.c) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(x3, rConstants, uint16_t(vmConstOp(inst.c) * sizeof(TValue))); + } + else + { + build.mov(x3, vmConstOp(inst.c) * sizeof(TValue)); + build.add(x3, rConstants, x3); + } + } + else + build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue))); + + build.mov(w4, TMS(intOp(inst.d))); + build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_doarith))); + build.blr(x5); + + emitUpdateBase(build); + break; + case IrCmd::GET_IMPORT: + regs.assertAllFree(); + emitInstGetImport(build, vmRegOp(inst.a), uintOp(inst.b)); + break; + case IrCmd::GET_UPVALUE: + { + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::q); + RegisterA64 temp3 = regs.allocTemp(KindA64::w); + + build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b))); + + // uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value + Label skip; + build.ldr(temp3, mem(temp1, offsetof(TValue, tt))); + build.cmp(temp3, LUA_TUPVAL); + build.b(ConditionA64::NotEqual, skip); + + // UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally) + build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc))); + build.ldr(temp1, mem(temp1, offsetof(UpVal, v))); + + build.setLabel(skip); + + build.ldr(temp2, temp1); + build.str(temp2, mem(rBase, vmRegOp(inst.a) * sizeof(TValue))); + break; + } + case IrCmd::CHECK_TAG: + build.cmp(regOp(inst.a), tagOp(inst.b)); + build.b(ConditionA64::NotEqual, labelOp(inst.c)); + break; + case IrCmd::CHECK_READONLY: + { + RegisterA64 temp = regs.allocTemp(KindA64::w); + build.ldrb(temp, mem(regOp(inst.a), offsetof(Table, readonly))); + build.cbnz(temp, labelOp(inst.b)); + break; + } + case IrCmd::CHECK_NO_METATABLE: + { + RegisterA64 temp = regs.allocTemp(KindA64::x); + build.ldr(temp, mem(regOp(inst.a), offsetof(Table, metatable))); + build.cbnz(temp, labelOp(inst.b)); + break; + } + case IrCmd::CHECK_SAFE_ENV: + { + RegisterA64 temp = regs.allocTemp(KindA64::x); + RegisterA64 tempw{KindA64::w, temp.index}; + build.ldr(temp, mem(rClosure, offsetof(Closure, env))); + build.ldrb(tempw, mem(temp, offsetof(Table, safeenv))); + build.cbz(tempw, labelOp(inst.a)); + break; + } case IrCmd::INTERRUPT: { - emitInterrupt(build, uintOp(inst.a)); + unsigned int pcpos = uintOp(inst.a); + regs.assertAllFree(); + + Label skip; + build.ldr(x2, mem(rState, offsetof(lua_State, global))); + build.ldr(x2, mem(x2, offsetof(global_State, cb.interrupt))); + build.cbz(x2, skip); + + // Jump to outlined interrupt handler, it will give back control to x1 + build.mov(x0, (pcpos + 1) * sizeof(Instruction)); + build.adr(x1, skip); + build.b(helpers.interrupt); + + build.setLabel(skip); break; } - case IrCmd::LOP_RETURN: + case IrCmd::SET_SAVEDPC: { + unsigned int pcpos = uintOp(inst.a); + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::x); + + // TODO: refactor into a common helper + if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(temp1, rCode, uint16_t(pcpos * sizeof(Instruction))); + } + else + { + build.mov(temp1, pcpos * sizeof(Instruction)); + build.add(temp1, rCode, temp1); + } + + build.ldr(temp2, mem(rState, offsetof(lua_State, ci))); + build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc))); + break; + } + case IrCmd::CALL: + regs.assertAllFree(); + emitInstCall(build, helpers, vmRegOp(inst.a), intOp(inst.b), intOp(inst.c)); + break; + case IrCmd::RETURN: + regs.assertAllFree(); emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b)); break; - } default: LUAU_ASSERT(!"Not supported yet"); break; } - // TODO - // regs.freeLastUseRegs(inst, index); + regs.freeLastUseRegs(inst, index); + regs.freeTempRegs(); } bool IrLoweringA64::isFallthroughBlock(IrBlock target, IrBlock next) @@ -85,6 +499,83 @@ void IrLoweringA64::jumpOrFallthrough(IrBlock& target, IrBlock& next) build.b(target.label); } +RegisterA64 IrLoweringA64::tempDouble(IrOp op) +{ + if (op.kind == IrOpKind::Inst) + return regOp(op); + else if (op.kind == IrOpKind::Constant) + { + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::d); + build.adr(temp1, doubleOp(op)); + build.ldr(temp2, temp1); + return temp2; + } + else + { + LUAU_ASSERT(!"Unsupported instruction form"); + return noreg; + } +} + +RegisterA64 IrLoweringA64::tempInt(IrOp op) +{ + if (op.kind == IrOpKind::Inst) + return regOp(op); + else if (op.kind == IrOpKind::Constant) + { + RegisterA64 temp = regs.allocTemp(KindA64::w); + build.mov(temp, intOp(op)); + return temp; + } + else + { + LUAU_ASSERT(!"Unsupported instruction form"); + return noreg; + } +} + +AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset) +{ + // This is needed to tighten the bounds checks in the VmConst case below + LUAU_ASSERT(offset % 4 == 0); + + if (op.kind == IrOpKind::VmReg) + return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset); + else if (op.kind == IrOpKind::VmConst) + { + size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset; + + // Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries + if (constantOffset / 4 <= AddressA64::kMaxOffset) + return mem(rConstants, int(constantOffset)); + + RegisterA64 temp = regs.allocTemp(KindA64::x); + + // TODO: refactor into a common helper + if (constantOffset <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(temp, rConstants, uint16_t(constantOffset)); + } + else + { + build.mov(temp, int(constantOffset)); + build.add(temp, rConstants, temp); + } + + return temp; + } + // If we have a register, we assume it's a pointer to TValue + // We might introduce explicit operand types in the future to make this more robust + else if (op.kind == IrOpKind::Inst) + return mem(regOp(op), offset); + else + { + LUAU_ASSERT(!"Unsupported instruction form"); + return noreg; + } +} + RegisterA64 IrLoweringA64::regOp(IrOp op) const { IrInst& inst = function.instOp(op); diff --git a/CodeGen/src/IrLoweringA64.h b/CodeGen/src/IrLoweringA64.h index aa9eba42..f638432f 100644 --- a/CodeGen/src/IrLoweringA64.h +++ b/CodeGen/src/IrLoweringA64.h @@ -4,6 +4,8 @@ #include "Luau/AssemblyBuilderA64.h" #include "Luau/IrData.h" +#include "IrRegAllocA64.h" + #include struct Proto; @@ -31,6 +33,11 @@ struct IrLoweringA64 bool isFallthroughBlock(IrBlock target, IrBlock next); void jumpOrFallthrough(IrBlock& target, IrBlock& next); + // Operand data build helpers + RegisterA64 tempDouble(IrOp op); + RegisterA64 tempInt(IrOp op); + AddressA64 tempAddr(IrOp op, int offset); + // Operand data lookup helpers RegisterA64 regOp(IrOp op) const; @@ -51,8 +58,7 @@ struct IrLoweringA64 IrFunction& function; - // TODO: - // IrRegAllocA64 regs; + IrRegAllocA64 regs; }; } // namespace A64 diff --git a/CodeGen/src/IrLoweringX64.cpp b/CodeGen/src/IrLoweringX64.cpp index 1cc56fe3..8c45f36a 100644 --- a/CodeGen/src/IrLoweringX64.cpp +++ b/CodeGen/src/IrLoweringX64.cpp @@ -4,6 +4,7 @@ #include "Luau/CodeGen.h" #include "Luau/DenseHash.h" #include "Luau/IrAnalysis.h" +#include "Luau/IrCallWrapperX64.h" #include "Luau/IrDump.h" #include "Luau/IrUtils.h" @@ -141,7 +142,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) inst.regX64 = regs.allocGprReg(SizeX64::qword); // Custom bit shift value can only be placed in cl - ScopedRegX64 shiftTmp{regs, regs.takeGprReg(rcx)}; + ScopedRegX64 shiftTmp{regs, regs.takeReg(rcx)}; ScopedRegX64 tmp{regs, SizeX64::qword}; @@ -325,82 +326,11 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::POW_NUM: { - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); - - ScopedRegX64 optLhsTmp{regs}; - RegisterX64 lhs; - - if (inst.a.kind == IrOpKind::Constant) - { - optLhsTmp.alloc(SizeX64::xmmword); - - build.vmovsd(optLhsTmp.reg, memRegDoubleOp(inst.a)); - lhs = optLhsTmp.reg; - } - else - { - lhs = regOp(inst.a); - } - - if (inst.b.kind == IrOpKind::Inst) - { - // TODO: this doesn't happen with current local-only register allocation, but has to be handled in the future - LUAU_ASSERT(regOp(inst.b) != xmm0); - - if (lhs != xmm0) - build.vmovsd(xmm0, lhs, lhs); - - if (regOp(inst.b) != xmm1) - build.vmovsd(xmm1, regOp(inst.b), regOp(inst.b)); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); - - if (inst.regX64 != xmm0) - build.vmovsd(inst.regX64, xmm0, xmm0); - } - else if (inst.b.kind == IrOpKind::Constant) - { - double rhs = doubleOp(inst.b); - - if (rhs == 2.0) - { - build.vmulsd(inst.regX64, lhs, lhs); - } - else if (rhs == 0.5) - { - build.vsqrtsd(inst.regX64, lhs, lhs); - } - else if (rhs == 3.0) - { - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - - build.vmulsd(tmp.reg, lhs, lhs); - build.vmulsd(inst.regX64, lhs, tmp.reg); - } - else - { - if (lhs != xmm0) - build.vmovsd(xmm0, xmm0, lhs); - - build.vmovsd(xmm1, build.f64(rhs)); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); - - if (inst.regX64 != xmm0) - build.vmovsd(inst.regX64, xmm0, xmm0); - } - } - else - { - if (lhs != xmm0) - build.vmovsd(xmm0, lhs, lhs); - - build.vmovsd(xmm1, memRegDoubleOp(inst.b)); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); - - if (inst.regX64 != xmm0) - build.vmovsd(inst.regX64, xmm0, xmm0); - } - + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(inst.a), inst.a); + callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(inst.b), inst.b); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); + inst.regX64 = regs.takeReg(xmm0); break; } case IrCmd::MIN_NUM: @@ -451,6 +381,46 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } + case IrCmd::FLOOR_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToNegativeInfinity); + break; + case IrCmd::CEIL_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToPositiveInfinity); + break; + case IrCmd::ROUND_NUM: + { + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + ScopedRegX64 tmp1{regs, SizeX64::xmmword}; + ScopedRegX64 tmp2{regs, SizeX64::xmmword}; + + if (inst.a.kind != IrOpKind::Inst || regOp(inst.a) != inst.regX64) + build.vmovsd(inst.regX64, memRegDoubleOp(inst.a)); + + build.vandpd(tmp1.reg, inst.regX64, build.f64x2(-0.0, -0.0)); + build.vmovsd(tmp2.reg, build.i64(0x3fdfffffffffffff)); // 0.49999999999999994 + build.vorpd(tmp1.reg, tmp1.reg, tmp2.reg); + build.vaddsd(inst.regX64, inst.regX64, tmp1.reg); + build.vroundsd(inst.regX64, inst.regX64, inst.regX64, RoundingModeX64::RoundToZero); + break; + } + case IrCmd::SQRT_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + build.vsqrtsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a)); + break; + case IrCmd::ABS_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + if (inst.a.kind != IrOpKind::Inst || regOp(inst.a) != inst.regX64) + build.vmovsd(inst.regX64, memRegDoubleOp(inst.a)); + + build.vandpd(inst.regX64, inst.regX64, build.i64(~(1LL << 63))); + break; case IrCmd::NOT_ANY: { // TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target @@ -539,7 +509,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::JUMP_CMP_ANY: - jumpOnAnyCmpFallback(build, vmRegOp(inst.a), vmRegOp(inst.b), conditionOp(inst.c), labelOp(inst.d)); + jumpOnAnyCmpFallback(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), conditionOp(inst.c), labelOp(inst.d)); jumpOrFallthrough(blockOp(inst.e), next); break; case IrCmd::JUMP_SLOT_MATCH: @@ -551,34 +521,34 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::TABLE_LEN: - inst.regX64 = regs.allocXmmReg(); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]); - build.mov(rArg1, regOp(inst.a)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]); + inst.regX64 = regs.allocXmmReg(); build.vcvtsi2sd(inst.regX64, inst.regX64, eax); break; + } case IrCmd::NEW_TABLE: - inst.regX64 = regs.allocGprReg(SizeX64::qword); - - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), uintOp(inst.a)); - build.mov(dwordReg(rArg3), uintOp(inst.b)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]); - - if (inst.regX64 != rax) - build.mov(inst.regX64, rax); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.a)), inst.a); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b)), inst.b); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]); + inst.regX64 = regs.takeReg(rax); break; + } case IrCmd::DUP_TABLE: - inst.regX64 = regs.allocGprReg(SizeX64::qword); - - // Re-ordered to avoid register conflict - build.mov(rArg2, regOp(inst.a)); - build.mov(rArg1, rState); - build.call(qword[rNativeContext + offsetof(NativeContext, luaH_clone)]); - - if (inst.regX64 != rax) - build.mov(inst.regX64, rax); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_clone)]); + inst.regX64 = regs.takeReg(rax); break; + } case IrCmd::TRY_NUM_TO_INDEX: { inst.regX64 = regs.allocGprReg(SizeX64::dword); @@ -590,12 +560,26 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::TRY_CALL_FASTGETTM: { - inst.regX64 = regs.allocGprReg(SizeX64::qword); + ScopedRegX64 tmp{regs, SizeX64::qword}; - callGetFastTmOrFallback(build, regOp(inst.a), TMS(intOp(inst.b)), labelOp(inst.c)); + build.mov(tmp.reg, qword[regOp(inst.a) + offsetof(Table, metatable)]); + regs.freeLastUseReg(function.instOp(inst.a), index); // Release before the call if it's the last use - if (inst.regX64 != rax) - build.mov(inst.regX64, rax); + build.test(tmp.reg, tmp.reg); + build.jcc(ConditionX64::Zero, labelOp(inst.c)); // No metatable + + build.test(byte[tmp.reg + offsetof(Table, tmcache)], 1 << intOp(inst.b)); + build.jcc(ConditionX64::NotZero, labelOp(inst.c)); // No tag method + + ScopedRegX64 tmp2{regs, SizeX64::qword}; + build.mov(tmp2.reg, qword[rState + offsetof(lua_State, global)]); + + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.addArgument(SizeX64::qword, intOp(inst.b)); + callWrap.addArgument(SizeX64::qword, qword[tmp2.release() + offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_gettm)]); + inst.regX64 = regs.takeReg(rax); break; } case IrCmd::INT_TO_NUM: @@ -701,7 +685,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.call(rax); - inst.regX64 = regs.takeGprReg(eax); // Result of a builtin call is returned in eax + inst.regX64 = regs.takeReg(eax); // Result of a builtin call is returned in eax break; } case IrCmd::CHECK_FASTCALL_RES: @@ -714,23 +698,23 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::DO_ARITH: if (inst.c.kind == IrOpKind::VmReg) - callArithHelper(build, vmRegOp(inst.a), vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), TMS(intOp(inst.d))); + callArithHelper(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), TMS(intOp(inst.d))); else - callArithHelper(build, vmRegOp(inst.a), vmRegOp(inst.b), luauConstantAddress(vmConstOp(inst.c)), TMS(intOp(inst.d))); + callArithHelper(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), luauConstantAddress(vmConstOp(inst.c)), TMS(intOp(inst.d))); break; case IrCmd::DO_LEN: - callLengthHelper(build, vmRegOp(inst.a), vmRegOp(inst.b)); + callLengthHelper(regs, build, vmRegOp(inst.a), vmRegOp(inst.b)); break; case IrCmd::GET_TABLE: if (inst.c.kind == IrOpKind::VmReg) { - callGetTable(build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); + callGetTable(regs, build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); } else if (inst.c.kind == IrOpKind::Constant) { TValue n; setnvalue(&n, uintOp(inst.c)); - callGetTable(build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); + callGetTable(regs, build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); } else { @@ -740,13 +724,13 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) case IrCmd::SET_TABLE: if (inst.c.kind == IrOpKind::VmReg) { - callSetTable(build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); + callSetTable(regs, build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); } else if (inst.c.kind == IrOpKind::Constant) { TValue n; setnvalue(&n, uintOp(inst.c)); - callSetTable(build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); + callSetTable(regs, build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); } else { @@ -757,13 +741,16 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) emitInstGetImportFallback(build, vmRegOp(inst.a), uintOp(inst.b)); break; case IrCmd::CONCAT: - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), uintOp(inst.b)); - build.mov(dwordReg(rArg3), vmRegOp(inst.a) + uintOp(inst.b) - 1); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_concat)]); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b))); + callWrap.addArgument(SizeX64::dword, int32_t(vmRegOp(inst.a) + uintOp(inst.b) - 1)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_concat)]); emitUpdateBase(build); break; + } case IrCmd::GET_UPVALUE: { ScopedRegX64 tmp1{regs, SizeX64::qword}; @@ -793,21 +780,26 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) Label next; ScopedRegX64 tmp1{regs, SizeX64::qword}; ScopedRegX64 tmp2{regs, SizeX64::qword}; - ScopedRegX64 tmp3{regs, SizeX64::xmmword}; build.mov(tmp1.reg, sClosure); build.mov(tmp2.reg, qword[tmp1.reg + offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)]); build.mov(tmp1.reg, qword[tmp2.reg + offsetof(UpVal, v)]); - build.vmovups(tmp3.reg, luauReg(vmRegOp(inst.b))); - build.vmovups(xmmword[tmp1.reg], tmp3.reg); - callBarrierObject(build, tmp1.reg, tmp2.reg, vmRegOp(inst.b), next); + { + ScopedRegX64 tmp3{regs, SizeX64::xmmword}; + build.vmovups(tmp3.reg, luauReg(vmRegOp(inst.b))); + build.vmovups(xmmword[tmp1.reg], tmp3.reg); + } + + tmp1.free(); + + callBarrierObject(regs, build, tmp2.release(), {}, vmRegOp(inst.b), next); build.setLabel(next); break; } case IrCmd::PREPARE_FORN: - callPrepareForN(build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); + callPrepareForN(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); break; case IrCmd::CHECK_TAG: if (inst.a.kind == IrOpKind::Inst) @@ -863,38 +855,43 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) jumpIfNodeHasNext(build, regOp(inst.a), labelOp(inst.b)); break; case IrCmd::INTERRUPT: + regs.assertAllFree(); emitInterrupt(build, uintOp(inst.a)); break; case IrCmd::CHECK_GC: { Label skip; - callCheckGc(build, -1, false, skip); + callCheckGc(regs, build, skip); build.setLabel(skip); break; } case IrCmd::BARRIER_OBJ: { Label skip; - ScopedRegX64 tmp{regs, SizeX64::qword}; - - callBarrierObject(build, tmp.reg, regOp(inst.a), vmRegOp(inst.b), skip); + callBarrierObject(regs, build, regOp(inst.a), inst.a, vmRegOp(inst.b), skip); build.setLabel(skip); break; } case IrCmd::BARRIER_TABLE_BACK: { Label skip; - - callBarrierTableFast(build, regOp(inst.a), skip); + callBarrierTableFast(regs, build, regOp(inst.a), inst.a, skip); build.setLabel(skip); break; } case IrCmd::BARRIER_TABLE_FORWARD: { Label skip; - ScopedRegX64 tmp{regs, SizeX64::qword}; - callBarrierTable(build, tmp.reg, regOp(inst.a), vmRegOp(inst.b), skip); + ScopedRegX64 tmp{regs, SizeX64::qword}; + checkObjectBarrierConditions(build, tmp.reg, regOp(inst.a), vmRegOp(inst.b), skip); + + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barriertable)]); + build.setLabel(skip); break; } @@ -926,11 +923,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(UpVal, v)]); build.jcc(ConditionX64::Above, next); - if (rArg2 != tmp2.reg) - build.mov(rArg2, tmp2.reg); + tmp1.free(); - build.mov(rArg1, rState); - build.call(qword[rNativeContext + offsetof(NativeContext, luaF_close)]); + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_close)]); build.setLabel(next); break; @@ -940,42 +938,53 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; // Fallbacks to non-IR instruction implementations - case IrCmd::LOP_SETLIST: + case IrCmd::SETLIST: { Label next; - emitInstSetList(build, next, vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d), uintOp(inst.e)); + regs.assertAllFree(); + emitInstSetList(regs, build, next, vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d), uintOp(inst.e)); build.setLabel(next); break; } - case IrCmd::LOP_CALL: + case IrCmd::CALL: + regs.assertAllFree(); emitInstCall(build, helpers, vmRegOp(inst.a), intOp(inst.b), intOp(inst.c)); break; - case IrCmd::LOP_RETURN: + case IrCmd::RETURN: + regs.assertAllFree(); emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b)); break; - case IrCmd::LOP_FORGLOOP: + case IrCmd::FORGLOOP: + regs.assertAllFree(); emitinstForGLoop(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c), labelOp(inst.d)); break; - case IrCmd::LOP_FORGLOOP_FALLBACK: - emitinstForGLoopFallback(build, uintOp(inst.a), vmRegOp(inst.b), intOp(inst.c), labelOp(inst.d)); - build.jmp(labelOp(inst.e)); + case IrCmd::FORGLOOP_FALLBACK: + regs.assertAllFree(); + emitinstForGLoopFallback(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c)); + build.jmp(labelOp(inst.d)); break; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: + regs.assertAllFree(); emitInstForGPrepXnextFallback(build, uintOp(inst.a), vmRegOp(inst.b), labelOp(inst.c)); break; - case IrCmd::LOP_AND: + case IrCmd::AND: + regs.assertAllFree(); emitInstAnd(build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); break; - case IrCmd::LOP_ANDK: + case IrCmd::ANDK: + regs.assertAllFree(); emitInstAndK(build, vmRegOp(inst.a), vmRegOp(inst.b), vmConstOp(inst.c)); break; - case IrCmd::LOP_OR: + case IrCmd::OR: + regs.assertAllFree(); emitInstOr(build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); break; - case IrCmd::LOP_ORK: + case IrCmd::ORK: + regs.assertAllFree(); emitInstOrK(build, vmRegOp(inst.a), vmRegOp(inst.b), vmConstOp(inst.c)); break; - case IrCmd::LOP_COVERAGE: + case IrCmd::COVERAGE: + regs.assertAllFree(); emitInstCoverage(build, uintOp(inst.a)); break; @@ -984,12 +993,14 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_GETGLOBAL, uintOp(inst.a)); break; case IrCmd::FALLBACK_SETGLOBAL: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_SETGLOBAL, uintOp(inst.a)); break; case IrCmd::FALLBACK_GETTABLEKS: @@ -997,6 +1008,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_GETTABLEKS, uintOp(inst.a)); break; case IrCmd::FALLBACK_SETTABLEKS: @@ -1004,6 +1016,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_SETTABLEKS, uintOp(inst.a)); break; case IrCmd::FALLBACK_NAMECALL: @@ -1011,32 +1024,38 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_NAMECALL, uintOp(inst.a)); break; case IrCmd::FALLBACK_PREPVARARGS: LUAU_ASSERT(inst.b.kind == IrOpKind::Constant); + regs.assertAllFree(); emitFallback(build, data, LOP_PREPVARARGS, uintOp(inst.a)); break; case IrCmd::FALLBACK_GETVARARGS: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::Constant); + regs.assertAllFree(); emitFallback(build, data, LOP_GETVARARGS, uintOp(inst.a)); break; case IrCmd::FALLBACK_NEWCLOSURE: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::Constant); + regs.assertAllFree(); emitFallback(build, data, LOP_NEWCLOSURE, uintOp(inst.a)); break; case IrCmd::FALLBACK_DUPCLOSURE: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_DUPCLOSURE, uintOp(inst.a)); break; case IrCmd::FALLBACK_FORGPREP: + regs.assertAllFree(); emitFallback(build, data, LOP_FORGPREP, uintOp(inst.a)); break; default: diff --git a/CodeGen/src/IrLoweringX64.h b/CodeGen/src/IrLoweringX64.h index c8ebd1f1..ecaa6a1d 100644 --- a/CodeGen/src/IrLoweringX64.h +++ b/CodeGen/src/IrLoweringX64.h @@ -3,8 +3,7 @@ #include "Luau/AssemblyBuilderX64.h" #include "Luau/IrData.h" - -#include "IrRegAllocX64.h" +#include "Luau/IrRegAllocX64.h" #include diff --git a/CodeGen/src/IrRegAllocA64.cpp b/CodeGen/src/IrRegAllocA64.cpp new file mode 100644 index 00000000..dc18ab56 --- /dev/null +++ b/CodeGen/src/IrRegAllocA64.cpp @@ -0,0 +1,174 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#include "IrRegAllocA64.h" + +#ifdef _MSC_VER +#include +#endif + +namespace Luau +{ +namespace CodeGen +{ +namespace A64 +{ + +inline int setBit(uint32_t n) +{ + LUAU_ASSERT(n); + +#ifdef _MSC_VER + unsigned long rl; + _BitScanReverse(&rl, n); + return int(rl); +#else + return 31 - __builtin_clz(n); +#endif +} + +IrRegAllocA64::IrRegAllocA64(IrFunction& function, std::initializer_list> regs) + : function(function) +{ + for (auto& p : regs) + { + LUAU_ASSERT(p.first.kind == p.second.kind && p.first.index <= p.second.index); + + Set& set = getSet(p.first.kind); + + for (int i = p.first.index; i <= p.second.index; ++i) + set.base |= 1u << i; + } + + gpr.free = gpr.base; + simd.free = simd.base; +} + +RegisterA64 IrRegAllocA64::allocReg(KindA64 kind) +{ + Set& set = getSet(kind); + + if (set.free == 0) + { + LUAU_ASSERT(!"Out of registers to allocate"); + return noreg; + } + + int index = setBit(set.free); + set.free &= ~(1u << index); + + return RegisterA64{kind, uint8_t(index)}; +} + +RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind) +{ + Set& set = getSet(kind); + + if (set.free == 0) + { + LUAU_ASSERT(!"Out of registers to allocate"); + return noreg; + } + + int index = setBit(set.free); + + set.free &= ~(1u << index); + set.temp |= 1u << index; + + return RegisterA64{kind, uint8_t(index)}; +} + +RegisterA64 IrRegAllocA64::allocReuse(KindA64 kind, uint32_t index, std::initializer_list oprefs) +{ + for (IrOp op : oprefs) + { + if (op.kind != IrOpKind::Inst) + continue; + + IrInst& source = function.instructions[op.index]; + + if (source.lastUse == index && !source.reusedReg) + { + LUAU_ASSERT(source.regA64.kind == kind); + + source.reusedReg = true; + return source.regA64; + } + } + + return allocReg(kind); +} + +void IrRegAllocA64::freeReg(RegisterA64 reg) +{ + Set& set = getSet(reg.kind); + + LUAU_ASSERT((set.base & (1u << reg.index)) != 0); + LUAU_ASSERT((set.free & (1u << reg.index)) == 0); + set.free |= 1u << reg.index; +} + +void IrRegAllocA64::freeLastUseReg(IrInst& target, uint32_t index) +{ + if (target.lastUse == index && !target.reusedReg) + { + // Register might have already been freed if it had multiple uses inside a single instruction + if (target.regA64 == noreg) + return; + + freeReg(target.regA64); + target.regA64 = noreg; + } +} + +void IrRegAllocA64::freeLastUseRegs(const IrInst& inst, uint32_t index) +{ + auto checkOp = [this, index](IrOp op) { + if (op.kind == IrOpKind::Inst) + freeLastUseReg(function.instructions[op.index], index); + }; + + checkOp(inst.a); + checkOp(inst.b); + checkOp(inst.c); + checkOp(inst.d); + checkOp(inst.e); + checkOp(inst.f); +} + +void IrRegAllocA64::freeTempRegs() +{ + LUAU_ASSERT((gpr.free & gpr.temp) == 0); + gpr.free |= gpr.temp; + gpr.temp = 0; + + LUAU_ASSERT((simd.free & simd.temp) == 0); + simd.free |= simd.temp; + simd.temp = 0; +} + +void IrRegAllocA64::assertAllFree() const +{ + LUAU_ASSERT(gpr.free == gpr.base); + LUAU_ASSERT(simd.free == simd.base); +} + +IrRegAllocA64::Set& IrRegAllocA64::getSet(KindA64 kind) +{ + switch (kind) + { + case KindA64::x: + case KindA64::w: + return gpr; + + case KindA64::d: + case KindA64::q: + return simd; + + default: + LUAU_ASSERT(!"Unexpected register kind"); + LUAU_UNREACHABLE(); + } +} + +} // namespace A64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/src/IrRegAllocA64.h b/CodeGen/src/IrRegAllocA64.h new file mode 100644 index 00000000..2ed0787a --- /dev/null +++ b/CodeGen/src/IrRegAllocA64.h @@ -0,0 +1,55 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#pragma once + +#include "Luau/IrData.h" +#include "Luau/RegisterA64.h" + +#include +#include + +namespace Luau +{ +namespace CodeGen +{ +namespace A64 +{ + +struct IrRegAllocA64 +{ + IrRegAllocA64(IrFunction& function, std::initializer_list> regs); + + RegisterA64 allocReg(KindA64 kind); + RegisterA64 allocTemp(KindA64 kind); + RegisterA64 allocReuse(KindA64 kind, uint32_t index, std::initializer_list oprefs); + + void freeReg(RegisterA64 reg); + + void freeLastUseReg(IrInst& target, uint32_t index); + void freeLastUseRegs(const IrInst& inst, uint32_t index); + + void freeTempRegs(); + + void assertAllFree() const; + + IrFunction& function; + + struct Set + { + // which registers are in the set that the allocator manages (initialized at construction) + uint32_t base = 0; + + // which subset of initial set is free + uint32_t free = 0; + + // which subset of initial set is allocated as temporary + uint32_t temp = 0; + }; + + Set gpr, simd; + + Set& getSet(KindA64 kind); +}; + +} // namespace A64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/src/IrRegAllocX64.cpp b/CodeGen/src/IrRegAllocX64.cpp index c527d033..eeb6cfe6 100644 --- a/CodeGen/src/IrRegAllocX64.cpp +++ b/CodeGen/src/IrRegAllocX64.cpp @@ -1,19 +1,5 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details -#include "IrRegAllocX64.h" - -#include "Luau/CodeGen.h" -#include "Luau/DenseHash.h" -#include "Luau/IrAnalysis.h" -#include "Luau/IrDump.h" -#include "Luau/IrUtils.h" - -#include "EmitCommonX64.h" -#include "EmitInstructionX64.h" -#include "NativeState.h" - -#include "lstate.h" - -#include +#include "Luau/IrRegAllocX64.h" namespace Luau { @@ -108,13 +94,21 @@ RegisterX64 IrRegAllocX64::allocXmmRegOrReuse(uint32_t index, std::initializer_l return allocXmmReg(); } -RegisterX64 IrRegAllocX64::takeGprReg(RegisterX64 reg) +RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg) { // In a more advanced register allocator, this would require a spill for the current register user // But at the current stage we don't have register live ranges intersecting forced register uses - LUAU_ASSERT(freeGprMap[reg.index]); + if (reg.size == SizeX64::xmmword) + { + LUAU_ASSERT(freeXmmMap[reg.index]); + freeXmmMap[reg.index] = false; + } + else + { + LUAU_ASSERT(freeGprMap[reg.index]); + freeGprMap[reg.index] = false; + } - freeGprMap[reg.index] = false; return reg; } @@ -134,7 +128,7 @@ void IrRegAllocX64::freeReg(RegisterX64 reg) void IrRegAllocX64::freeLastUseReg(IrInst& target, uint32_t index) { - if (target.lastUse == index && !target.reusedReg) + if (isLastUseReg(target, index)) { // Register might have already been freed if it had multiple uses inside a single instruction if (target.regX64 == noreg) @@ -160,6 +154,35 @@ void IrRegAllocX64::freeLastUseRegs(const IrInst& inst, uint32_t index) checkOp(inst.f); } +bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t index) const +{ + return target.lastUse == index && !target.reusedReg; +} + +bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const +{ + if (reg == noreg) + return false; + + LUAU_ASSERT(reg.size != SizeX64::xmmword); + + for (RegisterX64 gpr : kGprAllocOrder) + { + if (reg.index == gpr.index) + return true; + } + + return false; +} + +void IrRegAllocX64::assertFree(RegisterX64 reg) const +{ + if (reg.size == SizeX64::xmmword) + LUAU_ASSERT(freeXmmMap[reg.index]); + else + LUAU_ASSERT(freeGprMap[reg.index]); +} + void IrRegAllocX64::assertAllFree() const { for (RegisterX64 reg : kGprAllocOrder) @@ -211,6 +234,13 @@ void ScopedRegX64::free() reg = noreg; } +RegisterX64 ScopedRegX64::release() +{ + RegisterX64 tmp = reg; + reg = noreg; + return tmp; +} + } // namespace X64 } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/IrTranslateBuiltins.cpp b/CodeGen/src/IrTranslateBuiltins.cpp index cb8e4148..2955aaff 100644 --- a/CodeGen/src/IrTranslateBuiltins.cpp +++ b/CodeGen/src/IrTranslateBuiltins.cpp @@ -6,7 +6,6 @@ #include "lstate.h" -// TODO: should be possible to handle fastcalls in contexts where nresults is -1 by adding the adjustment instruction // TODO: when nresults is less than our actual result count, we can skip computing/writing unused results namespace Luau @@ -26,8 +25,8 @@ BuiltinImplResult translateBuiltinNumberToNumber( build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 1}; } @@ -43,8 +42,8 @@ BuiltinImplResult translateBuiltin2NumberToNumber( build.loadAndCheckTag(args, LUA_TNUMBER, fallback); build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO:tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 1}; } @@ -59,8 +58,9 @@ BuiltinImplResult translateBuiltinNumberTo2Number( build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: some tag updates might not be required, we place them here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + build.inst(IrCmd::STORE_TAG, build.vmReg(ra + 1), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 2}; @@ -131,8 +131,8 @@ BuiltinImplResult translateBuiltinMathLog( build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 1}; } @@ -210,6 +210,44 @@ BuiltinImplResult translateBuiltinMathClamp(IrBuilder& build, int nparams, int r return {BuiltinImplType::UsesFallback, 1}; } +BuiltinImplResult translateBuiltinMathUnary(IrBuilder& build, IrCmd cmd, int nparams, int ra, int arg, int nresults, IrOp fallback) +{ + if (nparams < 1 || nresults > 1) + return {BuiltinImplType::None, -1}; + + build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); + + IrOp varg = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg)); + IrOp result = build.inst(cmd, varg); + + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result); + + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + + return {BuiltinImplType::UsesFallback, 1}; +} + +BuiltinImplResult translateBuiltinMathBinary(IrBuilder& build, IrCmd cmd, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback) +{ + if (nparams < 2 || nresults > 1) + return {BuiltinImplType::None, -1}; + + build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); + build.loadAndCheckTag(args, LUA_TNUMBER, fallback); + + IrOp lhs = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg)); + IrOp rhs = build.inst(IrCmd::LOAD_DOUBLE, args); + IrOp result = build.inst(cmd, lhs, rhs); + + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result); + + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + + return {BuiltinImplType::UsesFallback, 1}; +} + BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback) { if (nparams < 1 || nresults > 1) @@ -218,7 +256,6 @@ BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, in build.inst( IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING)); return {BuiltinImplType::UsesFallback, 1}; @@ -232,7 +269,6 @@ BuiltinImplResult translateBuiltinTypeof(IrBuilder& build, int nparams, int ra, build.inst( IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING)); return {BuiltinImplType::UsesFallback, 1}; @@ -261,9 +297,17 @@ BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, case LBF_MATH_CLAMP: return translateBuiltinMathClamp(build, nparams, ra, arg, args, nresults, fallback); case LBF_MATH_FLOOR: + return translateBuiltinMathUnary(build, IrCmd::FLOOR_NUM, nparams, ra, arg, nresults, fallback); case LBF_MATH_CEIL: + return translateBuiltinMathUnary(build, IrCmd::CEIL_NUM, nparams, ra, arg, nresults, fallback); case LBF_MATH_SQRT: + return translateBuiltinMathUnary(build, IrCmd::SQRT_NUM, nparams, ra, arg, nresults, fallback); case LBF_MATH_ABS: + return translateBuiltinMathUnary(build, IrCmd::ABS_NUM, nparams, ra, arg, nresults, fallback); + case LBF_MATH_ROUND: + return translateBuiltinMathUnary(build, IrCmd::ROUND_NUM, nparams, ra, arg, nresults, fallback); + case LBF_MATH_POW: + return translateBuiltinMathBinary(build, IrCmd::POW_NUM, nparams, ra, arg, args, nresults, fallback); case LBF_MATH_EXP: case LBF_MATH_ASIN: case LBF_MATH_SIN: @@ -275,11 +319,9 @@ BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, case LBF_MATH_TAN: case LBF_MATH_TANH: case LBF_MATH_LOG10: - case LBF_MATH_ROUND: case LBF_MATH_SIGN: return translateBuiltinNumberToNumber(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback); case LBF_MATH_FMOD: - case LBF_MATH_POW: case LBF_MATH_ATAN2: case LBF_MATH_LDEXP: return translateBuiltin2NumberToNumber(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback); diff --git a/CodeGen/src/IrTranslation.cpp b/CodeGen/src/IrTranslation.cpp index d90841ce..e366888e 100644 --- a/CodeGen/src/IrTranslation.cpp +++ b/CodeGen/src/IrTranslation.cpp @@ -296,46 +296,60 @@ static void translateInstBinaryNumeric(IrBuilder& build, int ra, int rb, int rc, IrOp vb = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(rb)); IrOp vc; + IrOp result; + if (opc.kind == IrOpKind::VmConst) { LUAU_ASSERT(build.function.proto); TValue protok = build.function.proto->k[opc.index]; LUAU_ASSERT(protok.tt == LUA_TNUMBER); - vc = build.constDouble(protok.value.n); + + // VM has special cases for exponentiation with constants + if (tm == TM_POW && protok.value.n == 0.5) + result = build.inst(IrCmd::SQRT_NUM, vb); + else if (tm == TM_POW && protok.value.n == 2.0) + result = build.inst(IrCmd::MUL_NUM, vb, vb); + else if (tm == TM_POW && protok.value.n == 3.0) + result = build.inst(IrCmd::MUL_NUM, vb, build.inst(IrCmd::MUL_NUM, vb, vb)); + else + vc = build.constDouble(protok.value.n); } else { vc = build.inst(IrCmd::LOAD_DOUBLE, opc); } - IrOp va; - - switch (tm) + if (result.kind == IrOpKind::None) { - case TM_ADD: - va = build.inst(IrCmd::ADD_NUM, vb, vc); - break; - case TM_SUB: - va = build.inst(IrCmd::SUB_NUM, vb, vc); - break; - case TM_MUL: - va = build.inst(IrCmd::MUL_NUM, vb, vc); - break; - case TM_DIV: - va = build.inst(IrCmd::DIV_NUM, vb, vc); - break; - case TM_MOD: - va = build.inst(IrCmd::MOD_NUM, vb, vc); - break; - case TM_POW: - va = build.inst(IrCmd::POW_NUM, vb, vc); - break; - default: - LUAU_ASSERT(!"unsupported binary op"); + LUAU_ASSERT(vc.kind != IrOpKind::None); + + switch (tm) + { + case TM_ADD: + result = build.inst(IrCmd::ADD_NUM, vb, vc); + break; + case TM_SUB: + result = build.inst(IrCmd::SUB_NUM, vb, vc); + break; + case TM_MUL: + result = build.inst(IrCmd::MUL_NUM, vb, vc); + break; + case TM_DIV: + result = build.inst(IrCmd::DIV_NUM, vb, vc); + break; + case TM_MOD: + result = build.inst(IrCmd::MOD_NUM, vb, vc); + break; + case TM_POW: + result = build.inst(IrCmd::POW_NUM, vb, vc); + break; + default: + LUAU_ASSERT(!"unsupported binary op"); + } } - build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), va); + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result); if (ra != rb && ra != rc) // TODO: optimization should handle second check, but we'll test this later build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); @@ -638,7 +652,7 @@ void translateInstForGPrepNext(IrBuilder& build, const Instruction* pc, int pcpo build.inst(IrCmd::JUMP, target); build.beginBlock(fallback); - build.inst(IrCmd::LOP_FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); + build.inst(IrCmd::FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); } void translateInstForGPrepInext(IrBuilder& build, const Instruction* pc, int pcpos) @@ -670,7 +684,7 @@ void translateInstForGPrepInext(IrBuilder& build, const Instruction* pc, int pcp build.inst(IrCmd::JUMP, target); build.beginBlock(fallback); - build.inst(IrCmd::LOP_FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); + build.inst(IrCmd::FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); } void translateInstForGLoopIpairs(IrBuilder& build, const Instruction* pc, int pcpos) @@ -721,7 +735,8 @@ void translateInstForGLoopIpairs(IrBuilder& build, const Instruction* pc, int pc build.inst(IrCmd::JUMP, loopRepeat); build.beginBlock(fallback); - build.inst(IrCmd::LOP_FORGLOOP_FALLBACK, build.constUint(pcpos), build.vmReg(ra), build.constInt(int(pc[1])), loopRepeat, loopExit); + build.inst(IrCmd::SET_SAVEDPC, build.constUint(pcpos + 1)); + build.inst(IrCmd::FORGLOOP_FALLBACK, build.vmReg(ra), build.constInt(int(pc[1])), loopRepeat, loopExit); // Fallthrough in original bytecode is implicit, so we start next internal block here if (build.isInternalBlock(loopExit)) diff --git a/CodeGen/src/IrUtils.cpp b/CodeGen/src/IrUtils.cpp index b28ce596..45e2bae0 100644 --- a/CodeGen/src/IrUtils.cpp +++ b/CodeGen/src/IrUtils.cpp @@ -320,6 +320,26 @@ void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint3 if (inst.a.kind == IrOpKind::Constant) substitute(function, inst, build.constDouble(-function.doubleOp(inst.a))); break; + case IrCmd::FLOOR_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(floor(function.doubleOp(inst.a)))); + break; + case IrCmd::CEIL_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(ceil(function.doubleOp(inst.a)))); + break; + case IrCmd::ROUND_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(round(function.doubleOp(inst.a)))); + break; + case IrCmd::SQRT_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(sqrt(function.doubleOp(inst.a)))); + break; + case IrCmd::ABS_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(fabs(function.doubleOp(inst.a)))); + break; case IrCmd::NOT_ANY: if (inst.a.kind == IrOpKind::Constant) { diff --git a/CodeGen/src/NativeState.cpp b/CodeGen/src/NativeState.cpp index f1497890..ddc9c03d 100644 --- a/CodeGen/src/NativeState.cpp +++ b/CodeGen/src/NativeState.cpp @@ -109,6 +109,8 @@ void initHelperFunctions(NativeState& data) data.context.forgPrepXnextFallback = forgPrepXnextFallback; data.context.callProlog = callProlog; data.context.callEpilogC = callEpilogC; + + data.context.callFallback = callFallback; data.context.returnFallback = returnFallback; } diff --git a/CodeGen/src/NativeState.h b/CodeGen/src/NativeState.h index 6d833189..2d97e63c 100644 --- a/CodeGen/src/NativeState.h +++ b/CodeGen/src/NativeState.h @@ -101,7 +101,9 @@ struct NativeContext void (*forgPrepXnextFallback)(lua_State* L, TValue* ra, int pc) = nullptr; Closure* (*callProlog)(lua_State* L, TValue* ra, StkId argtop, int nresults) = nullptr; void (*callEpilogC)(lua_State* L, int nresults, int n) = nullptr; - const Instruction* (*returnFallback)(lua_State* L, StkId ra, int n) = nullptr; + + Closure* (*callFallback)(lua_State* L, StkId ra, StkId argtop, int nresults) = nullptr; + Closure* (*returnFallback)(lua_State* L, StkId ra, int n) = nullptr; // Opcode fallbacks, implemented in C NativeFallback fallback[LOP__COUNT] = {}; diff --git a/CodeGen/src/OptimizeConstProp.cpp b/CodeGen/src/OptimizeConstProp.cpp index 67236476..f767f549 100644 --- a/CodeGen/src/OptimizeConstProp.cpp +++ b/CodeGen/src/OptimizeConstProp.cpp @@ -503,10 +503,10 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& } } break; - case IrCmd::LOP_AND: - case IrCmd::LOP_ANDK: - case IrCmd::LOP_OR: - case IrCmd::LOP_ORK: + case IrCmd::AND: + case IrCmd::ANDK: + case IrCmd::OR: + case IrCmd::ORK: state.invalidate(inst.a); break; case IrCmd::FASTCALL: @@ -533,6 +533,11 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& case IrCmd::MIN_NUM: case IrCmd::MAX_NUM: case IrCmd::UNM_NUM: + case IrCmd::FLOOR_NUM: + case IrCmd::CEIL_NUM: + case IrCmd::ROUND_NUM: + case IrCmd::SQRT_NUM: + case IrCmd::ABS_NUM: case IrCmd::NOT_ANY: case IrCmd::JUMP: case IrCmd::JUMP_EQ_POINTER: @@ -547,10 +552,10 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& case IrCmd::CHECK_SLOT_MATCH: case IrCmd::CHECK_NODE_NO_NEXT: case IrCmd::BARRIER_TABLE_BACK: - case IrCmd::LOP_RETURN: - case IrCmd::LOP_COVERAGE: + case IrCmd::RETURN: + case IrCmd::COVERAGE: case IrCmd::SET_UPVALUE: - case IrCmd::LOP_SETLIST: // We don't track table state that this can invalidate + case IrCmd::SETLIST: // We don't track table state that this can invalidate case IrCmd::SET_SAVEDPC: // TODO: we may be able to remove some updates to PC case IrCmd::CLOSE_UPVALS: // Doesn't change memory that we track case IrCmd::CAPTURE: @@ -599,18 +604,18 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& case IrCmd::INTERRUPT: state.invalidateUserCall(); break; - case IrCmd::LOP_CALL: + case IrCmd::CALL: state.invalidateRegistersFrom(inst.a.index); state.invalidateUserCall(); break; - case IrCmd::LOP_FORGLOOP: + case IrCmd::FORGLOOP: state.invalidateRegistersFrom(inst.a.index + 2); // Rn and Rn+1 are not modified break; - case IrCmd::LOP_FORGLOOP_FALLBACK: - state.invalidateRegistersFrom(inst.b.index + 2); // Rn and Rn+1 are not modified + case IrCmd::FORGLOOP_FALLBACK: + state.invalidateRegistersFrom(inst.a.index + 2); // Rn and Rn+1 are not modified state.invalidateUserCall(); break; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: // This fallback only conditionally throws an exception break; case IrCmd::FALLBACK_GETGLOBAL: diff --git a/Compiler/src/Compiler.cpp b/Compiler/src/Compiler.cpp index 03f4b3e6..9478404a 100644 --- a/Compiler/src/Compiler.cpp +++ b/Compiler/src/Compiler.cpp @@ -25,8 +25,6 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineThreshold, 25) LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300) LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5) -LUAU_FASTFLAGVARIABLE(LuauCompileBuiltinArity, false) - namespace Luau { @@ -295,7 +293,7 @@ struct Compiler // handles builtin calls that can't be constant-folded but are known to return one value // note: optimizationLevel check is technically redundant but it's important that we never optimize based on builtins in O1 - if (FFlag::LuauCompileBuiltinArity && options.optimizationLevel >= 2) + if (options.optimizationLevel >= 2) if (int* bfid = builtins.find(expr)) return getBuiltinInfo(*bfid).results != 1; @@ -766,7 +764,7 @@ struct Compiler { if (!isExprMultRet(expr->args.data[expr->args.size - 1])) return compileExprFastcallN(expr, target, targetCount, targetTop, multRet, regs, bfid); - else if (FFlag::LuauCompileBuiltinArity && options.optimizationLevel >= 2 && int(expr->args.size) == getBuiltinInfo(bfid).params) + else if (options.optimizationLevel >= 2 && int(expr->args.size) == getBuiltinInfo(bfid).params) return compileExprFastcallN(expr, target, targetCount, targetTop, multRet, regs, bfid); } diff --git a/Sources.cmake b/Sources.cmake index 3f32aab8..3508ec39 100644 --- a/Sources.cmake +++ b/Sources.cmake @@ -65,8 +65,10 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/include/Luau/ConditionX64.h CodeGen/include/Luau/IrAnalysis.h CodeGen/include/Luau/IrBuilder.h + CodeGen/include/Luau/IrCallWrapperX64.h CodeGen/include/Luau/IrDump.h CodeGen/include/Luau/IrData.h + CodeGen/include/Luau/IrRegAllocX64.h CodeGen/include/Luau/IrUtils.h CodeGen/include/Luau/Label.h CodeGen/include/Luau/OperandX64.h @@ -94,9 +96,11 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/src/Fallbacks.cpp CodeGen/src/IrAnalysis.cpp CodeGen/src/IrBuilder.cpp + CodeGen/src/IrCallWrapperX64.cpp CodeGen/src/IrDump.cpp CodeGen/src/IrLoweringA64.cpp CodeGen/src/IrLoweringX64.cpp + CodeGen/src/IrRegAllocA64.cpp CodeGen/src/IrRegAllocX64.cpp CodeGen/src/IrTranslateBuiltins.cpp CodeGen/src/IrTranslation.cpp @@ -122,7 +126,7 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/src/FallbacksProlog.h CodeGen/src/IrLoweringA64.h CodeGen/src/IrLoweringX64.h - CodeGen/src/IrRegAllocX64.h + CodeGen/src/IrRegAllocA64.h CodeGen/src/IrTranslateBuiltins.h CodeGen/src/IrTranslation.h CodeGen/src/NativeState.h @@ -342,6 +346,7 @@ if(TARGET Luau.UnitTest) tests/Fixture.h tests/IostreamOptional.h tests/ScopedFlags.h + tests/AssemblyBuilderA64.test.cpp tests/AssemblyBuilderX64.test.cpp tests/AstJsonEncoder.test.cpp tests/AstQuery.test.cpp @@ -358,6 +363,7 @@ if(TARGET Luau.UnitTest) tests/Error.test.cpp tests/Frontend.test.cpp tests/IrBuilder.test.cpp + tests/IrCallWrapperX64.test.cpp tests/JsonEmitter.test.cpp tests/Lexer.test.cpp tests/Linter.test.cpp diff --git a/VM/src/lbuiltins.cpp b/VM/src/lbuiltins.cpp index 3c669bff..e0dc8a38 100644 --- a/VM/src/lbuiltins.cpp +++ b/VM/src/lbuiltins.cpp @@ -23,8 +23,6 @@ #endif #endif -LUAU_FASTFLAGVARIABLE(LuauBuiltinSSE41, false) - // luauF functions implement FASTCALL instruction that performs a direct execution of some builtin functions from the VM // The rule of thumb is that FASTCALL functions can not call user code, yield, fail, or reallocate stack. // If types of the arguments mismatch, luauF_* needs to return -1 and the execution will fall back to the usual call path @@ -105,9 +103,7 @@ static int luauF_atan(lua_State* L, StkId res, TValue* arg0, int nresults, StkId return -1; } -// TODO: LUAU_NOINLINE can be removed with LuauBuiltinSSE41 LUAU_FASTMATH_BEGIN -LUAU_NOINLINE static int luauF_ceil(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) @@ -170,9 +166,7 @@ static int luauF_exp(lua_State* L, StkId res, TValue* arg0, int nresults, StkId return -1; } -// TODO: LUAU_NOINLINE can be removed with LuauBuiltinSSE41 LUAU_FASTMATH_BEGIN -LUAU_NOINLINE static int luauF_floor(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) @@ -949,9 +943,7 @@ static int luauF_sign(lua_State* L, StkId res, TValue* arg0, int nresults, StkId return -1; } -// TODO: LUAU_NOINLINE can be removed with LuauBuiltinSSE41 LUAU_FASTMATH_BEGIN -LUAU_NOINLINE static int luauF_round(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) @@ -1271,9 +1263,6 @@ LUAU_TARGET_SSE41 inline double roundsd_sse41(double v) LUAU_TARGET_SSE41 static int luauF_floor_sse41(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { - if (!FFlag::LuauBuiltinSSE41) - return luauF_floor(L, res, arg0, nresults, args, nparams); - if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) { double a1 = nvalue(arg0); @@ -1286,9 +1275,6 @@ LUAU_TARGET_SSE41 static int luauF_floor_sse41(lua_State* L, StkId res, TValue* LUAU_TARGET_SSE41 static int luauF_ceil_sse41(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { - if (!FFlag::LuauBuiltinSSE41) - return luauF_ceil(L, res, arg0, nresults, args, nparams); - if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) { double a1 = nvalue(arg0); @@ -1301,9 +1287,6 @@ LUAU_TARGET_SSE41 static int luauF_ceil_sse41(lua_State* L, StkId res, TValue* a LUAU_TARGET_SSE41 static int luauF_round_sse41(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { - if (!FFlag::LuauBuiltinSSE41) - return luauF_round(L, res, arg0, nresults, args, nparams); - if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) { double a1 = nvalue(arg0); diff --git a/fuzz/format.cpp b/fuzz/format.cpp index 3ad3912f..4b943bf1 100644 --- a/fuzz/format.cpp +++ b/fuzz/format.cpp @@ -1,6 +1,7 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #include "Luau/Common.h" +#include #include #include diff --git a/fuzz/linter.cpp b/fuzz/linter.cpp index 66ca5bb1..854c6327 100644 --- a/fuzz/linter.cpp +++ b/fuzz/linter.cpp @@ -3,10 +3,10 @@ #include "Luau/BuiltinDefinitions.h" #include "Luau/Common.h" +#include "Luau/Frontend.h" #include "Luau/Linter.h" #include "Luau/ModuleResolver.h" #include "Luau/Parser.h" -#include "Luau/TypeInfer.h" extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { @@ -18,18 +18,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) Luau::ParseResult parseResult = Luau::Parser::parse(reinterpret_cast(Data), Size, names, allocator, options); // "static" here is to accelerate fuzzing process by only creating and populating the type environment once - static Luau::NullModuleResolver moduleResolver; - static Luau::InternalErrorReporter iceHandler; - static Luau::TypeChecker sharedEnv(&moduleResolver, &iceHandler); - static int once = (Luau::registerBuiltinGlobals(sharedEnv), 1); + static Luau::NullFileResolver fileResolver; + static Luau::NullConfigResolver configResolver; + static Luau::Frontend frontend{&fileResolver, &configResolver}; + static int once = (Luau::registerBuiltinGlobals(frontend), 1); (void)once; - static int once2 = (Luau::freeze(sharedEnv.globalTypes), 1); + static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1); (void)once2; if (parseResult.errors.empty()) { - Luau::TypeChecker typeck(&moduleResolver, &iceHandler); - typeck.globalScope = sharedEnv.globalScope; + Luau::TypeChecker typeck(frontend.globals.globalScope, &frontend.moduleResolver, frontend.builtinTypes, &frontend.iceHandler); Luau::LintOptions lintOptions; lintOptions.warningMask = ~0ull; diff --git a/fuzz/proto.cpp b/fuzz/proto.cpp index c94f0889..ffeb4919 100644 --- a/fuzz/proto.cpp +++ b/fuzz/proto.cpp @@ -261,8 +261,8 @@ DEFINE_PROTO_FUZZER(const luau::ModuleSet& message) { static FuzzFileResolver fileResolver; static FuzzConfigResolver configResolver; - static Luau::FrontendOptions options{true, true}; - static Luau::Frontend frontend(&fileResolver, &configResolver, options); + static Luau::FrontendOptions defaultOptions{/*retainFullTypeGraphs*/ true, /*forAutocomplete*/ false, /*runLintChecks*/ kFuzzLinter}; + static Luau::Frontend frontend(&fileResolver, &configResolver, defaultOptions); static int once = (setupFrontend(frontend), 0); (void)once; @@ -285,16 +285,12 @@ DEFINE_PROTO_FUZZER(const luau::ModuleSet& message) try { - Luau::CheckResult result = frontend.check(name, std::nullopt); - - // lint (note that we need access to types so we need to do this with typeck in scope) - if (kFuzzLinter && result.errors.empty()) - frontend.lint(name, std::nullopt); + frontend.check(name); // Second pass in strict mode (forced by auto-complete) - Luau::FrontendOptions opts; - opts.forAutocomplete = true; - frontend.check(name, opts); + Luau::FrontendOptions options = defaultOptions; + options.forAutocomplete = true; + frontend.check(name, options); } catch (std::exception&) { diff --git a/fuzz/typeck.cpp b/fuzz/typeck.cpp index a6c9ae28..4f8f8857 100644 --- a/fuzz/typeck.cpp +++ b/fuzz/typeck.cpp @@ -3,9 +3,9 @@ #include "Luau/BuiltinDefinitions.h" #include "Luau/Common.h" +#include "Luau/Frontend.h" #include "Luau/ModuleResolver.h" #include "Luau/Parser.h" -#include "Luau/TypeInfer.h" LUAU_FASTINT(LuauTypeInferRecursionLimit) LUAU_FASTINT(LuauTypeInferTypePackLoopLimit) @@ -23,23 +23,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) Luau::ParseResult parseResult = Luau::Parser::parse(reinterpret_cast(Data), Size, names, allocator, options); // "static" here is to accelerate fuzzing process by only creating and populating the type environment once - static Luau::NullModuleResolver moduleResolver; - static Luau::InternalErrorReporter iceHandler; - static Luau::TypeChecker sharedEnv(&moduleResolver, &iceHandler); - static int once = (Luau::registerBuiltinGlobals(sharedEnv), 1); + static Luau::NullFileResolver fileResolver; + static Luau::NullConfigResolver configResolver; + static Luau::Frontend frontend{&fileResolver, &configResolver}; + static int once = (Luau::registerBuiltinGlobals(frontend), 1); (void)once; - static int once2 = (Luau::freeze(sharedEnv.globalTypes), 1); + static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1); (void)once2; if (parseResult.errors.empty()) { + Luau::TypeChecker typeck(frontend.globals.globalScope, &frontend.moduleResolver, frontend.builtinTypes, &frontend.iceHandler); + Luau::SourceModule module; module.root = parseResult.root; module.mode = Luau::Mode::Nonstrict; - Luau::TypeChecker typeck(&moduleResolver, &iceHandler); - typeck.globalScope = sharedEnv.globalScope; - try { typeck.check(module, Luau::Mode::Nonstrict); diff --git a/tests/AssemblyBuilderA64.test.cpp b/tests/AssemblyBuilderA64.test.cpp index a68932ba..1690c748 100644 --- a/tests/AssemblyBuilderA64.test.cpp +++ b/tests/AssemblyBuilderA64.test.cpp @@ -32,9 +32,9 @@ static std::string bytecodeAsArray(const std::vector& code) class AssemblyBuilderA64Fixture { public: - bool check(void (*f)(AssemblyBuilderA64& build), std::vector code, std::vector data = {}) + bool check(void (*f)(AssemblyBuilderA64& build), std::vector code, std::vector data = {}, unsigned int features = 0) { - AssemblyBuilderA64 build(/* logText= */ false); + AssemblyBuilderA64 build(/* logText= */ false, features); f(build); @@ -285,6 +285,87 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOfLabel") // clang-format on } +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPBasic") +{ + SINGLE_COMPARE(fmov(d0, d1), 0x1E604020); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPMath") +{ + SINGLE_COMPARE(fabs(d1, d2), 0x1E60C041); + SINGLE_COMPARE(fadd(d1, d2, d3), 0x1E632841); + SINGLE_COMPARE(fdiv(d1, d2, d3), 0x1E631841); + SINGLE_COMPARE(fmul(d1, d2, d3), 0x1E630841); + SINGLE_COMPARE(fneg(d1, d2), 0x1E614041); + SINGLE_COMPARE(fsqrt(d1, d2), 0x1E61C041); + SINGLE_COMPARE(fsub(d1, d2, d3), 0x1E633841); + + SINGLE_COMPARE(frinta(d1, d2), 0x1E664041); + SINGLE_COMPARE(frintm(d1, d2), 0x1E654041); + SINGLE_COMPARE(frintp(d1, d2), 0x1E64C041); + + SINGLE_COMPARE(fcvtzs(w1, d2), 0x1E780041); + SINGLE_COMPARE(fcvtzs(x1, d2), 0x9E780041); + SINGLE_COMPARE(fcvtzu(w1, d2), 0x1E790041); + SINGLE_COMPARE(fcvtzu(x1, d2), 0x9E790041); + + SINGLE_COMPARE(scvtf(d1, w2), 0x1E620041); + SINGLE_COMPARE(scvtf(d1, x2), 0x9E620041); + SINGLE_COMPARE(ucvtf(d1, w2), 0x1E630041); + SINGLE_COMPARE(ucvtf(d1, x2), 0x9E630041); + + CHECK(check( + [](AssemblyBuilderA64& build) { + build.fjcvtzs(w1, d2); + }, + {0x1E7E0041}, {}, A64::Feature_JSCVT)); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPLoadStore") +{ + // address forms + SINGLE_COMPARE(ldr(d0, x1), 0xFD400020); + SINGLE_COMPARE(ldr(d0, mem(x1, 8)), 0xFD400420); + SINGLE_COMPARE(ldr(d0, mem(x1, x7)), 0xFC676820); + SINGLE_COMPARE(ldr(d0, mem(x1, -7)), 0xFC5F9020); + SINGLE_COMPARE(str(d0, x1), 0xFD000020); + SINGLE_COMPARE(str(d0, mem(x1, 8)), 0xFD000420); + SINGLE_COMPARE(str(d0, mem(x1, x7)), 0xFC276820); + SINGLE_COMPARE(str(d0, mem(x1, -7)), 0xFC1F9020); + + // load/store sizes + SINGLE_COMPARE(ldr(d0, x1), 0xFD400020); + SINGLE_COMPARE(ldr(q0, x1), 0x3DC00020); + SINGLE_COMPARE(str(d0, x1), 0xFD000020); + SINGLE_COMPARE(str(q0, x1), 0x3D800020); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPCompare") +{ + SINGLE_COMPARE(fcmp(d0, d1), 0x1E612000); + SINGLE_COMPARE(fcmpz(d1), 0x1E602028); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOffsetSize") +{ + SINGLE_COMPARE(ldr(w0, mem(x1, 16)), 0xB9401020); + SINGLE_COMPARE(ldr(x0, mem(x1, 16)), 0xF9400820); + SINGLE_COMPARE(ldr(d0, mem(x1, 16)), 0xFD400820); + SINGLE_COMPARE(ldr(q0, mem(x1, 16)), 0x3DC00420); + + SINGLE_COMPARE(str(w0, mem(x1, 16)), 0xB9001020); + SINGLE_COMPARE(str(x0, mem(x1, 16)), 0xF9000820); + SINGLE_COMPARE(str(d0, mem(x1, 16)), 0xFD000820); + SINGLE_COMPARE(str(q0, mem(x1, 16)), 0x3D800420); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "ConditionalSelect") +{ + SINGLE_COMPARE(csel(x0, x1, x2, ConditionA64::Equal), 0x9A820020); + SINGLE_COMPARE(csel(w0, w1, w2, ConditionA64::Equal), 0x1A820020); + SINGLE_COMPARE(fcsel(d0, d1, d2, ConditionA64::Equal), 0x1E620C20); +} + TEST_CASE("LogTest") { AssemblyBuilderA64 build(/* logText= */ true); @@ -309,6 +390,14 @@ TEST_CASE("LogTest") build.ldp(x0, x1, mem(x8, 8)); build.adr(x0, l); + build.fabs(d1, d2); + build.ldr(q1, x2); + + build.csel(x0, x1, x2, ConditionA64::Equal); + + build.fcmp(d0, d1); + build.fcmpz(d0); + build.setLabel(l); build.ret(); @@ -331,6 +420,11 @@ TEST_CASE("LogTest") cbz x7,.L1 ldp x0,x1,[x8,#8] adr x0,.L1 + fabs d1,d2 + ldr q1,[x2] + csel x0,x1,x2,eq + fcmp d0,d1 + fcmp d0,#0 .L1: ret )"; diff --git a/tests/Autocomplete.test.cpp b/tests/Autocomplete.test.cpp index aedb50ab..53dc99e1 100644 --- a/tests/Autocomplete.test.cpp +++ b/tests/Autocomplete.test.cpp @@ -2995,8 +2995,6 @@ TEST_CASE_FIXTURE(ACFixture, "autocomplete_string_singletons") TEST_CASE_FIXTURE(ACFixture, "string_singleton_as_table_key") { - ScopedFastFlag sff{"LuauCompleteTableKeysBetter", true}; - check(R"( type Direction = "up" | "down" diff --git a/tests/Compiler.test.cpp b/tests/Compiler.test.cpp index c9d0c01d..cabf1cce 100644 --- a/tests/Compiler.test.cpp +++ b/tests/Compiler.test.cpp @@ -4691,8 +4691,6 @@ RETURN R0 0 TEST_CASE("LoopUnrollCost") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - ScopedFastInt sfis[] = { {"LuauCompileLoopUnrollThreshold", 25}, {"LuauCompileLoopUnrollThresholdMaxBoost", 300}, @@ -5962,8 +5960,6 @@ RETURN R2 1 TEST_CASE("InlineMultret") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - // inlining a function in multret context is prohibited since we can't adjust L->top outside of CALL/GETVARARGS CHECK_EQ("\n" + compileFunction(R"( local function foo(a) @@ -6301,8 +6297,6 @@ RETURN R0 52 TEST_CASE("BuiltinFoldingProhibited") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - CHECK_EQ("\n" + compileFunction(R"( return math.abs(), @@ -6905,8 +6899,6 @@ L3: RETURN R0 0 TEST_CASE("BuiltinArity") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - // by default we can't assume that we know parameter/result count for builtins as they can be overridden at runtime CHECK_EQ("\n" + compileFunction(R"( return math.abs(unknown()) diff --git a/tests/Conformance.test.cpp b/tests/Conformance.test.cpp index 1072b95d..957d3271 100644 --- a/tests/Conformance.test.cpp +++ b/tests/Conformance.test.cpp @@ -504,7 +504,7 @@ TEST_CASE("Types") Luau::InternalErrorReporter iceHandler; Luau::BuiltinTypes builtinTypes; Luau::GlobalTypes globals{Luau::NotNull{&builtinTypes}}; - Luau::TypeChecker env(globals, &moduleResolver, Luau::NotNull{&builtinTypes}, &iceHandler); + Luau::TypeChecker env(globals.globalScope, &moduleResolver, Luau::NotNull{&builtinTypes}, &iceHandler); Luau::registerBuiltinGlobals(env, globals); Luau::freeze(globals.globalTypes); diff --git a/tests/ConstraintGraphBuilderFixture.cpp b/tests/ConstraintGraphBuilderFixture.cpp index cc239b7e..d34b86bd 100644 --- a/tests/ConstraintGraphBuilderFixture.cpp +++ b/tests/ConstraintGraphBuilderFixture.cpp @@ -31,8 +31,7 @@ void ConstraintGraphBuilderFixture::generateConstraints(const std::string& code) void ConstraintGraphBuilderFixture::solve(const std::string& code) { generateConstraints(code); - ConstraintSolver cs{NotNull{&normalizer}, NotNull{rootScope}, constraints, "MainModule", NotNull{mainModule->reduction.get()}, - NotNull(&moduleResolver), {}, &logger}; + ConstraintSolver cs{NotNull{&normalizer}, NotNull{rootScope}, constraints, "MainModule", NotNull(&moduleResolver), {}, &logger}; cs.run(); } diff --git a/tests/IrBuilder.test.cpp b/tests/IrBuilder.test.cpp index f4c9cdca..c1392c9d 100644 --- a/tests/IrBuilder.test.cpp +++ b/tests/IrBuilder.test.cpp @@ -42,7 +42,7 @@ public: f(a); build.beginBlock(a); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); }; template @@ -56,10 +56,10 @@ public: f(a, b); build.beginBlock(a); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(b); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); }; void checkEq(IrOp instOp, const IrInst& inst) @@ -94,10 +94,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptCheckTag") build.inst(IrCmd::CHECK_TAG, tag1, build.constTag(0), fallback); IrOp tag2 = build.inst(IrCmd::LOAD_TAG, build.vmConst(5)); build.inst(IrCmd::CHECK_TAG, tag2, build.constTag(0), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -107,10 +107,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptCheckTag") bb_0: CHECK_TAG R2, tnil, bb_fallback_1 CHECK_TAG K5, tnil, bb_fallback_1 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -123,7 +123,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptBinaryArith") IrOp opA = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(1)); IrOp opB = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(2)); build.inst(IrCmd::ADD_NUM, opA, opB); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -133,7 +133,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptBinaryArith") bb_0: %0 = LOAD_DOUBLE R1 %2 = ADD_NUM %0, R2 - LOP_RETURN 0u + RETURN 0u )"); } @@ -150,10 +150,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptEqTag1") build.inst(IrCmd::JUMP_EQ_TAG, opA, opB, trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -165,10 +165,10 @@ bb_0: JUMP_EQ_TAG R1, %1, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -186,10 +186,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptEqTag2") build.inst(IrCmd::JUMP_EQ_TAG, opA, opB, trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -203,10 +203,10 @@ bb_0: JUMP_EQ_TAG R2, %0, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -224,10 +224,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptEqTag3") build.inst(IrCmd::JUMP_EQ_TAG, opA, build.constTag(0), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -241,10 +241,10 @@ bb_0: JUMP_EQ_TAG %2, tnil, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -261,10 +261,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptJumpCmpNum") build.inst(IrCmd::JUMP_CMP_NUM, opA, opB, trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -276,10 +276,10 @@ bb_0: JUMP_CMP_NUM R1, %1, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -317,7 +317,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "Numeric") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(0), build.inst(IrCmd::INT_TO_NUM, build.constInt(8))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constantFold(); @@ -342,7 +342,7 @@ bb_0: STORE_INT R0, 1i STORE_INT R0, 0i STORE_DOUBLE R0, 8 - LOP_RETURN 0u + RETURN 0u )"); } @@ -373,25 +373,25 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN 1u + RETURN 1u bb_3: JUMP bb_5 bb_5: - LOP_RETURN 2u + RETURN 2u bb_6: JUMP bb_7 bb_7: - LOP_RETURN 1u + RETURN 1u bb_9: JUMP bb_11 bb_11: - LOP_RETURN 2u + RETURN 2u )"); } @@ -400,18 +400,18 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NumToIndex") { withOneBlock([this](IrOp a) { build.inst(IrCmd::STORE_INT, build.vmReg(0), build.inst(IrCmd::TRY_NUM_TO_INDEX, build.constDouble(4), a)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); withOneBlock([this](IrOp a) { build.inst(IrCmd::STORE_INT, build.vmReg(0), build.inst(IrCmd::TRY_NUM_TO_INDEX, build.constDouble(1.2), a)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); withOneBlock([this](IrOp a) { IrOp nan = build.inst(IrCmd::DIV_NUM, build.constDouble(0.0), build.constDouble(0.0)); build.inst(IrCmd::STORE_INT, build.vmReg(0), build.inst(IrCmd::TRY_NUM_TO_INDEX, nan, a)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); updateUseCounts(build.function); @@ -420,19 +420,19 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NumToIndex") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: STORE_INT R0, 4i - LOP_RETURN 0u + RETURN 0u bb_2: JUMP bb_3 bb_3: - LOP_RETURN 1u + RETURN 1u bb_4: JUMP bb_5 bb_5: - LOP_RETURN 1u + RETURN 1u )"); } @@ -441,12 +441,12 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "Guards") { withOneBlock([this](IrOp a) { build.inst(IrCmd::CHECK_TAG, build.constTag(tnumber), build.constTag(tnumber), a); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); withOneBlock([this](IrOp a) { build.inst(IrCmd::CHECK_TAG, build.constTag(tnil), build.constTag(tnumber), a); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); updateUseCounts(build.function); @@ -454,13 +454,13 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "Guards") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: - LOP_RETURN 0u + RETURN 0u bb_2: JUMP bb_3 bb_3: - LOP_RETURN 1u + RETURN 1u )"); } @@ -568,7 +568,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RememberTagsAndValues") build.inst(IrCmd::STORE_INT, build.vmReg(10), build.inst(IrCmd::LOAD_INT, build.vmReg(1))); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(11), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(2))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -593,7 +593,7 @@ bb_0: STORE_INT R10, %20 %22 = LOAD_DOUBLE R2 STORE_DOUBLE R11, %22 - LOP_RETURN 0u + RETURN 0u )"); } @@ -614,7 +614,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "PropagateThroughTvalue") build.inst(IrCmd::STORE_TAG, build.vmReg(3), build.inst(IrCmd::LOAD_TAG, build.vmReg(1))); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(3), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(1))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -627,7 +627,7 @@ bb_0: STORE_TVALUE R1, %2 STORE_TAG R3, tnumber STORE_DOUBLE R3, 0.5 - LOP_RETURN 0u + RETURN 0u )"); } @@ -641,10 +641,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipCheckTag") build.inst(IrCmd::STORE_TAG, build.vmReg(0), build.constTag(tnumber)); build.inst(IrCmd::CHECK_TAG, build.inst(IrCmd::LOAD_TAG, build.vmReg(0)), build.constTag(tnumber), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -652,7 +652,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipCheckTag") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: STORE_TAG R0, tnumber - LOP_RETURN 0u + RETURN 0u )"); } @@ -671,7 +671,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipOncePerBlockChecks") build.inst(IrCmd::DO_LEN, build.vmReg(1), build.vmReg(2)); // Can make env unsafe build.inst(IrCmd::CHECK_SAFE_ENV); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -682,7 +682,7 @@ bb_0: CHECK_GC DO_LEN R1, R2 CHECK_SAFE_ENV - LOP_RETURN 0u + RETURN 0u )"); } @@ -707,10 +707,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RememberTableState") build.inst(IrCmd::CHECK_NO_METATABLE, table, fallback); build.inst(IrCmd::CHECK_READONLY, table, fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -723,10 +723,10 @@ bb_0: DO_LEN R1, R2 CHECK_NO_METATABLE %0, bb_fallback_1 CHECK_READONLY %0, bb_fallback_1 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -742,7 +742,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipUselessBarriers") build.inst(IrCmd::BARRIER_TABLE_FORWARD, table, build.vmReg(0)); IrOp something = build.inst(IrCmd::LOAD_POINTER, build.vmReg(2)); build.inst(IrCmd::BARRIER_OBJ, something, build.vmReg(0)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -750,7 +750,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipUselessBarriers") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: STORE_TAG R0, tnumber - LOP_RETURN 0u + RETURN 0u )"); } @@ -773,7 +773,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "ConcatInvalidation") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(6), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(2))); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(7), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(3))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -792,7 +792,7 @@ bb_0: %9 = LOAD_DOUBLE R2 STORE_DOUBLE R6, %9 STORE_DOUBLE R7, 2 - LOP_RETURN 0u + RETURN 0u )"); } @@ -819,10 +819,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "BuiltinFastcallsMayInvalidateMemory") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(1), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(0))); // At least R0 wasn't touched - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -837,10 +837,10 @@ bb_0: CHECK_NO_METATABLE %1, bb_fallback_1 CHECK_READONLY %1, bb_fallback_1 STORE_DOUBLE R1, 0.5 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -855,7 +855,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RedundantStoreCheckConstantType") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(0), build.constDouble(0.5)); build.inst(IrCmd::STORE_INT, build.vmReg(0), build.constInt(10)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -865,7 +865,7 @@ bb_0: STORE_INT R0, 10i STORE_DOUBLE R0, 0.5 STORE_INT R0, 10i - LOP_RETURN 0u + RETURN 0u )"); } @@ -882,10 +882,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagCheckPropagation") build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnumber), fallback); build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnumber), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -894,10 +894,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagCheckPropagation") bb_0: %0 = LOAD_TAG R0 CHECK_TAG %0, tnumber, bb_fallback_1 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -914,10 +914,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagCheckPropagationConflicting") build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnumber), fallback); build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnil), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -929,7 +929,7 @@ bb_0: JUMP bb_fallback_1 bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -947,13 +947,13 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TruthyTestRemoval") build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(1), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(3)); + build.inst(IrCmd::RETURN, build.constUint(3)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -965,10 +965,10 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN 1u + RETURN 1u bb_fallback_3: - LOP_RETURN 3u + RETURN 3u )"); } @@ -986,13 +986,13 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FalsyTestRemoval") build.inst(IrCmd::JUMP_IF_FALSY, build.vmReg(1), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(3)); + build.inst(IrCmd::RETURN, build.constUint(3)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1004,10 +1004,10 @@ bb_0: JUMP bb_2 bb_2: - LOP_RETURN 2u + RETURN 2u bb_fallback_3: - LOP_RETURN 3u + RETURN 3u )"); } @@ -1024,10 +1024,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagEqRemoval") build.inst(IrCmd::JUMP_EQ_TAG, tag, build.constTag(tnumber), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1039,7 +1039,7 @@ bb_0: JUMP bb_2 bb_2: - LOP_RETURN 2u + RETURN 2u )"); } @@ -1056,10 +1056,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "IntEqRemoval") build.inst(IrCmd::JUMP_EQ_INT, value, build.constInt(5), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1070,7 +1070,7 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -1087,10 +1087,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NumCmpRemoval") build.inst(IrCmd::JUMP_CMP_NUM, value, build.constDouble(8.0), build.cond(IrCondition::Greater), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1101,7 +1101,7 @@ bb_0: JUMP bb_2 bb_2: - LOP_RETURN 2u + RETURN 2u )"); } @@ -1118,7 +1118,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "DataFlowsThroughDirectJumpToUniqueSuccessor build.beginBlock(block2); build.inst(IrCmd::STORE_TAG, build.vmReg(1), build.inst(IrCmd::LOAD_TAG, build.vmReg(0))); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1130,7 +1130,7 @@ bb_0: bb_1: STORE_TAG R1, tnumber - LOP_RETURN 1u + RETURN 1u )"); } @@ -1148,7 +1148,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "DataDoesNotFlowThroughDirectJumpToNonUnique build.beginBlock(block2); build.inst(IrCmd::STORE_TAG, build.vmReg(1), build.inst(IrCmd::LOAD_TAG, build.vmReg(0))); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(block3); build.inst(IrCmd::JUMP, block2); @@ -1164,7 +1164,7 @@ bb_0: bb_1: %2 = LOAD_TAG R0 STORE_TAG R1, %2 - LOP_RETURN 1u + RETURN 1u bb_2: JUMP bb_1 @@ -1183,7 +1183,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "EntryBlockUseRemoval") build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(0), exit, repeat); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(repeat); build.inst(IrCmd::INTERRUPT, build.constUint(0)); @@ -1198,7 +1198,7 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1211,14 +1211,14 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RecursiveSccUseRemoval1") IrOp repeat = build.block(IrBlockKind::Internal); build.beginBlock(entry); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(block); build.inst(IrCmd::STORE_TAG, build.vmReg(0), build.constTag(tnumber)); build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(0), exit, repeat); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(repeat); build.inst(IrCmd::INTERRUPT, build.constUint(0)); @@ -1229,14 +1229,14 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RecursiveSccUseRemoval1") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: - LOP_RETURN R0, 0i + RETURN R0, 0i bb_1: STORE_TAG R0, tnumber JUMP bb_2 bb_2: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1253,14 +1253,14 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RecursiveSccUseRemoval2") build.inst(IrCmd::JUMP_EQ_INT, build.constInt(0), build.constInt(1), block, exit1); build.beginBlock(exit1); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(block); build.inst(IrCmd::STORE_TAG, build.vmReg(0), build.constTag(tnumber)); build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(0), exit2, repeat); build.beginBlock(exit2); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(repeat); build.inst(IrCmd::INTERRUPT, build.constUint(0)); @@ -1274,14 +1274,14 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN R0, 0i + RETURN R0, 0i bb_2: STORE_TAG R0, tnumber JUMP bb_3 bb_3: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1322,7 +1322,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SimplePathExtraction") build.inst(IrCmd::JUMP, block4); build.beginBlock(block4); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1350,10 +1350,10 @@ bb_4: JUMP bb_5 bb_5: - LOP_RETURN R0, 0i + RETURN R0, 0i bb_linear_6: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1393,11 +1393,11 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NoPathExtractionForBlocksWithLiveOutValues" build.beginBlock(block4a); build.inst(IrCmd::STORE_TAG, build.vmReg(0), tag3a); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(block4b); build.inst(IrCmd::STORE_TAG, build.vmReg(0), tag3a); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1427,11 +1427,11 @@ bb_4: bb_5: STORE_TAG R0, %10 - LOP_RETURN R0, 0i + RETURN R0, 0i bb_6: STORE_TAG R0, %10 - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1488,7 +1488,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SimpleDiamond") build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(2), build.constInt(2)); + build.inst(IrCmd::RETURN, build.vmReg(2), build.constInt(2)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1522,7 +1522,7 @@ bb_2: bb_3: ; predecessors: bb_1, bb_2 ; in regs: R2, R3 - LOP_RETURN R2, 2i + RETURN R2, 2i )"); } @@ -1534,11 +1534,11 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "ImplicitFixedRegistersInVarargCall") build.beginBlock(entry); build.inst(IrCmd::FALLBACK_GETVARARGS, build.constUint(0), build.vmReg(3), build.constInt(-1)); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(5)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(5)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(5)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(5)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1549,13 +1549,13 @@ bb_0: ; in regs: R0, R1, R2 ; out regs: R0, R1, R2, R3, R4 FALLBACK_GETVARARGS 0u, R3, -1i - LOP_CALL R0, -1i, 5i + CALL R0, -1i, 5i JUMP bb_1 bb_1: ; predecessors: bb_0 ; in regs: R0, R1, R2, R3, R4 - LOP_RETURN R0, 5i + RETURN R0, 5i )"); } @@ -1573,7 +1573,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "ExplicitUseOfRegisterInVarargSequence") build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1590,7 +1590,7 @@ bb_0: bb_1: ; predecessors: bb_0 ; in regs: R0... - LOP_RETURN R0, -1i + RETURN R0, -1i )"); } @@ -1601,12 +1601,12 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "VariadicSequenceRestart") IrOp exit = build.block(IrBlockKind::Internal); build.beginBlock(entry); - build.inst(IrCmd::LOP_CALL, build.vmReg(1), build.constInt(0), build.constInt(-1)); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(1), build.constInt(0), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1616,14 +1616,14 @@ bb_0: ; successors: bb_1 ; in regs: R0, R1 ; out regs: R0... - LOP_CALL R1, 0i, -1i - LOP_CALL R0, -1i, -1i + CALL R1, 0i, -1i + CALL R0, -1i, -1i JUMP bb_1 bb_1: ; predecessors: bb_0 ; in regs: R0... - LOP_RETURN R0, -1i + RETURN R0, -1i )"); } @@ -1637,15 +1637,15 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FallbackDoesNotFlowUp") build.beginBlock(entry); build.inst(IrCmd::FALLBACK_GETVARARGS, build.constUint(0), build.vmReg(1), build.constInt(-1)); build.inst(IrCmd::CHECK_TAG, build.inst(IrCmd::LOAD_TAG, build.vmReg(0)), build.constTag(tnumber), fallback); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(fallback); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1658,7 +1658,7 @@ bb_0: FALLBACK_GETVARARGS 0u, R1, -1i %1 = LOAD_TAG R0 CHECK_TAG %1, tnumber, bb_fallback_1 - LOP_CALL R0, -1i, -1i + CALL R0, -1i, -1i JUMP bb_2 bb_fallback_1: @@ -1666,13 +1666,13 @@ bb_fallback_1: ; successors: bb_2 ; in regs: R0, R1... ; out regs: R0... - LOP_CALL R0, -1i, -1i + CALL R0, -1i, -1i JUMP bb_2 bb_2: ; predecessors: bb_0, bb_fallback_1 ; in regs: R0... - LOP_RETURN R0, -1i + RETURN R0, -1i )"); } @@ -1697,7 +1697,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "VariadicSequencePeeling") build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(2), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(2), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1732,7 +1732,7 @@ bb_2: bb_3: ; predecessors: bb_1, bb_2 ; in regs: R2... - LOP_RETURN R2, -1i + RETURN R2, -1i )"); } @@ -1746,11 +1746,11 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "BuiltinVariadicStart") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(1), build.constDouble(1.0)); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(2), build.constDouble(2.0)); build.inst(IrCmd::ADJUST_STACK_TO_REG, build.vmReg(2), build.constInt(1)); - build.inst(IrCmd::LOP_CALL, build.vmReg(1), build.constInt(-1), build.constInt(1)); + build.inst(IrCmd::CALL, build.vmReg(1), build.constInt(-1), build.constInt(1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(2)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(2)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1763,13 +1763,13 @@ bb_0: STORE_DOUBLE R1, 1 STORE_DOUBLE R2, 2 ADJUST_STACK_TO_REG R2, 1i - LOP_CALL R1, -1i, 1i + CALL R1, -1i, 1i JUMP bb_1 bb_1: ; predecessors: bb_0 ; in regs: R0, R1 - LOP_RETURN R0, 2i + RETURN R0, 2i )"); } @@ -1781,7 +1781,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SetTable") build.beginBlock(entry); build.inst(IrCmd::SET_TABLE, build.vmReg(0), build.vmReg(1), build.constUint(1)); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1790,7 +1790,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SetTable") bb_0: ; in regs: R0, R1 SET_TABLE R0, R1, 1u - LOP_RETURN R0, 1i + RETURN R0, 1i )"); } diff --git a/tests/IrCallWrapperX64.test.cpp b/tests/IrCallWrapperX64.test.cpp new file mode 100644 index 00000000..8c7b1393 --- /dev/null +++ b/tests/IrCallWrapperX64.test.cpp @@ -0,0 +1,484 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#include "Luau/IrCallWrapperX64.h" +#include "Luau/IrRegAllocX64.h" + +#include "doctest.h" + +using namespace Luau::CodeGen; +using namespace Luau::CodeGen::X64; + +class IrCallWrapperX64Fixture +{ +public: + IrCallWrapperX64Fixture() + : build(/* logText */ true, ABIX64::Windows) + , regs(function) + , callWrap(regs, build, ~0u) + { + } + + void checkMatch(std::string expected) + { + regs.assertAllFree(); + + build.finalize(); + + CHECK("\n" + build.text == expected); + } + + AssemblyBuilderX64 build; + IrFunction function; + IrRegAllocX64 regs; + IrCallWrapperX64 callWrap; + + // Tests rely on these to force interference between registers + static constexpr RegisterX64 rArg1 = rcx; + static constexpr RegisterX64 rArg1d = ecx; + static constexpr RegisterX64 rArg2 = rdx; + static constexpr RegisterX64 rArg2d = edx; + static constexpr RegisterX64 rArg3 = r8; + static constexpr RegisterX64 rArg3d = r8d; + static constexpr RegisterX64 rArg4 = r9; + static constexpr RegisterX64 rArg4d = r9d; +}; + +TEST_SUITE_BEGIN("IrCallWrapperX64"); + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "SimpleRegs") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rax)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, tmp1); + callWrap.addArgument(SizeX64::qword, tmp2); // Already in its place + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,rax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "TrickyUse1") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1.reg); // Already in its place + callWrap.addArgument(SizeX64::qword, tmp1.release()); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rdx,rcx + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "TrickyUse2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg]); + callWrap.addArgument(SizeX64::qword, tmp1.release()); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rdx,rcx + mov rcx,qword ptr [rcx] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "SimpleMemImm") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rax)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rsi)}; + callWrap.addArgument(SizeX64::dword, 32); + callWrap.addArgument(SizeX64::dword, -1); + callWrap.addArgument(SizeX64::qword, qword[r14 + 32]); + callWrap.addArgument(SizeX64::qword, qword[tmp1.release() + tmp2.release()]); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov r8,qword ptr [r14+020h] + mov r9,qword ptr [rax+rsi] + mov ecx,20h + mov edx,FFFFFFFFh + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "SimpleStackArgs") +{ + ScopedRegX64 tmp{regs, regs.takeReg(rax)}; + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.addArgument(SizeX64::qword, qword[r14 + 16]); + callWrap.addArgument(SizeX64::qword, qword[r14 + 32]); + callWrap.addArgument(SizeX64::qword, qword[r14 + 48]); + callWrap.addArgument(SizeX64::dword, 1); + callWrap.addArgument(SizeX64::qword, qword[r13]); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rdx,qword ptr [r13] + mov qword ptr [rsp+028h],rdx + mov rcx,rax + mov rdx,qword ptr [r14+010h] + mov r8,qword ptr [r14+020h] + mov r9,qword ptr [r14+030h] + mov dword ptr [rsp+020h],1 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "FixedRegisters") +{ + callWrap.addArgument(SizeX64::dword, 1); + callWrap.addArgument(SizeX64::qword, 2); + callWrap.addArgument(SizeX64::qword, 3); + callWrap.addArgument(SizeX64::qword, 4); + callWrap.addArgument(SizeX64::qword, r14); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov qword ptr [rsp+020h],r14 + mov ecx,1 + mov rdx,2 + mov r8,3 + mov r9,4 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "EasyInterference") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rdi)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rsi)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg2)}; + ScopedRegX64 tmp4{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1); + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.addArgument(SizeX64::qword, tmp3); + callWrap.addArgument(SizeX64::qword, tmp4); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov r8,rdx + mov rdx,rsi + mov r9,rcx + mov rcx,rdi + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "FakeInterference") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.release() + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp2.release() + 8]); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,qword ptr [rcx+8] + mov rdx,qword ptr [rdx+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceInt") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg4)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg3)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg2)}; + ScopedRegX64 tmp4{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1); + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.addArgument(SizeX64::qword, tmp3); + callWrap.addArgument(SizeX64::qword, tmp4); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,r9 + mov r9,rcx + mov rcx,rax + mov rax,r8 + mov r8,rdx + mov rdx,rax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceInt2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg4d)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg3d)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg2d)}; + ScopedRegX64 tmp4{regs, regs.takeReg(rArg1d)}; + callWrap.addArgument(SizeX64::dword, tmp1); + callWrap.addArgument(SizeX64::dword, tmp2); + callWrap.addArgument(SizeX64::dword, tmp3); + callWrap.addArgument(SizeX64::dword, tmp4); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov eax,r9d + mov r9d,ecx + mov ecx,eax + mov eax,r8d + mov r8d,edx + mov edx,eax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceFp") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(xmm1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(xmm0)}; + callWrap.addArgument(SizeX64::xmmword, tmp1); + callWrap.addArgument(SizeX64::xmmword, tmp2); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm2,xmm1,xmm1 + vmovsd xmm1,xmm0,xmm0 + vmovsd xmm0,xmm2,xmm2 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceBoth") +{ + ScopedRegX64 int1{regs, regs.takeReg(rArg2)}; + ScopedRegX64 int2{regs, regs.takeReg(rArg1)}; + ScopedRegX64 fp1{regs, regs.takeReg(xmm3)}; + ScopedRegX64 fp2{regs, regs.takeReg(xmm2)}; + callWrap.addArgument(SizeX64::qword, int1); + callWrap.addArgument(SizeX64::qword, int2); + callWrap.addArgument(SizeX64::xmmword, fp1); + callWrap.addArgument(SizeX64::xmmword, fp2); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,rdx + mov rdx,rcx + mov rcx,rax + vmovsd xmm0,xmm3,xmm3 + vmovsd xmm3,xmm2,xmm2 + vmovsd xmm2,xmm0,xmm0 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "FakeMultiuseInterferenceMem") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp2.reg + 16]); + tmp1.release(); + tmp2.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,qword ptr [rcx+rdx+8] + mov rdx,qword ptr [rdx+010h] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardMultiuseInterferenceMem1") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + 16]); + tmp1.release(); + tmp2.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,rcx + mov rcx,qword ptr [rax+rdx+8] + mov rdx,qword ptr [rax+010h] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardMultiuseInterferenceMem2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 16]); + tmp1.release(); + tmp2.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,rcx + mov rcx,qword ptr [rax+rdx+8] + mov rdx,qword ptr [rax+rdx+010h] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardMultiuseInterferenceMem3") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg3)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp2.reg + tmp3.reg + 16]); + callWrap.addArgument(SizeX64::qword, qword[tmp3.reg + tmp1.reg + 16]); + tmp1.release(); + tmp2.release(); + tmp3.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,r8 + mov r8,qword ptr [rcx+rax+010h] + mov rbx,rdx + mov rdx,qword ptr [rbx+rcx+010h] + mov rcx,qword ptr [rax+rbx+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "InterferenceWithCallArg1") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + 8]); + callWrap.call(qword[tmp1.release() + 16]); + + checkMatch(R"( + mov rax,rcx + mov rcx,qword ptr [rax+8] + call qword ptr [rax+010h] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "InterferenceWithCallArg2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.call(qword[tmp1.release() + 16]); + + checkMatch(R"( + mov rax,rcx + mov rcx,rdx + call qword ptr [rax+010h] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "InterferenceWithCallArg3") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1.reg); + callWrap.call(qword[tmp1.release() + 16]); + + checkMatch(R"( + call qword ptr [rcx+010h] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse1") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(xmm0); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); // Already in its place + callWrap.addArgument(SizeX64::xmmword, qword[r12 + 8]); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm1,qword ptr [r12+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse2") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(xmm0); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + callWrap.addArgument(SizeX64::xmmword, qword[r12 + 8]); + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm1,xmm0,xmm0 + vmovsd xmm0,qword ptr [r12+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse3") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(xmm0); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm1,xmm0,xmm0 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse4") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(rax); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + ScopedRegX64 tmp{regs, regs.takeReg(rdx)}; + callWrap.addArgument(SizeX64::qword, r15); + callWrap.addArgument(SizeX64::qword, irInst1.regX64, irOp1); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,r15 + mov r8,rdx + mov rdx,rax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "ExtraCoverage") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, addr[r12 + 8]); + callWrap.addArgument(SizeX64::qword, addr[r12 + 16]); + callWrap.addArgument(SizeX64::xmmword, xmmword[r13]); + callWrap.call(qword[tmp1.release() + tmp2.release()]); + + checkMatch(R"( + vmovups xmm2,xmmword ptr [r13] + mov rax,rcx + lea rcx,none ptr [r12+8] + mov rbx,rdx + lea rdx,none ptr [r12+010h] + call qword ptr [rax+rbx] +)"); +} + +TEST_SUITE_END(); diff --git a/tests/Lexer.test.cpp b/tests/Lexer.test.cpp index 7fcc1e54..78d1389a 100644 --- a/tests/Lexer.test.cpp +++ b/tests/Lexer.test.cpp @@ -157,8 +157,6 @@ TEST_CASE("string_interpolation_basic") TEST_CASE("string_interpolation_full") { - ScopedFastFlag sff("LuauFixInterpStringMid", true); - const std::string testInput = R"(`foo {"bar"} {"baz"} end`)"; Luau::Allocator alloc; AstNameTable table(alloc); diff --git a/tests/Linter.test.cpp b/tests/Linter.test.cpp index 0f134616..8bef5922 100644 --- a/tests/Linter.test.cpp +++ b/tests/Linter.test.cpp @@ -1444,8 +1444,6 @@ TEST_CASE_FIXTURE(Fixture, "LintHygieneUAF") TEST_CASE_FIXTURE(BuiltinsFixture, "DeprecatedApiTyped") { - ScopedFastFlag sff("LuauImproveDeprecatedApiLint", true); - unfreeze(frontend.globals.globalTypes); TypeId instanceType = frontend.globals.globalTypes.addType(ClassType{"Instance", {}, std::nullopt, std::nullopt, {}, {}, "Test"}); persist(instanceType); @@ -1496,8 +1494,6 @@ end TEST_CASE_FIXTURE(BuiltinsFixture, "DeprecatedApiUntyped") { - ScopedFastFlag sff("LuauImproveDeprecatedApiLint", true); - if (TableType* ttv = getMutable(getGlobalBinding(frontend.globals, "table"))) { ttv->props["foreach"].deprecated = true; diff --git a/tests/Normalize.test.cpp b/tests/Normalize.test.cpp index a495ee23..4378bab8 100644 --- a/tests/Normalize.test.cpp +++ b/tests/Normalize.test.cpp @@ -470,7 +470,6 @@ TEST_SUITE_END(); struct NormalizeFixture : Fixture { - ScopedFastFlag sff1{"LuauNegatedFunctionTypes", true}; ScopedFastFlag sff2{"LuauNegatedClassTypes", true}; TypeArena arena; diff --git a/tests/Parser.test.cpp b/tests/Parser.test.cpp index 9ff16d16..ef5aabbe 100644 --- a/tests/Parser.test.cpp +++ b/tests/Parser.test.cpp @@ -1040,8 +1040,6 @@ TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_call_without_parens") TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_expression") { - ScopedFastFlag sff("LuauFixInterpStringMid", true); - try { parse(R"( diff --git a/tests/TypeInfer.aliases.test.cpp b/tests/TypeInfer.aliases.test.cpp index 022abea0..52de15c7 100644 --- a/tests/TypeInfer.aliases.test.cpp +++ b/tests/TypeInfer.aliases.test.cpp @@ -1014,4 +1014,34 @@ TEST_CASE_FIXTURE(Fixture, "another_thing_from_roact") LUAU_REQUIRE_NO_ERRORS(result); } +/* + * It is sometimes possible for type alias resolution to produce a TypeId that + * belongs to a different module. + * + * We must not mutate any fields of the resulting type when this happens. The + * memory has been frozen. + */ +TEST_CASE_FIXTURE(BuiltinsFixture, "alias_expands_to_bare_reference_to_imported_type") +{ + fileResolver.source["game/A"] = R"( + --!strict + export type Object = {[string]: any} + return {} + )"; + + fileResolver.source["game/B"] = R"( + local A = require(script.Parent.A) + + type Object = A.Object + type ReadOnly = T + + local function f(): ReadOnly + return nil :: any + end + )"; + + CheckResult result = frontend.check("game/B"); + LUAU_REQUIRE_NO_ERRORS(result); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.functions.test.cpp b/tests/TypeInfer.functions.test.cpp index c7f9684b..f1d42c6a 100644 --- a/tests/TypeInfer.functions.test.cpp +++ b/tests/TypeInfer.functions.test.cpp @@ -1784,7 +1784,6 @@ z = y -- Not OK, so the line is colorable TEST_CASE_FIXTURE(Fixture, "function_is_supertype_of_concrete_functions") { - ScopedFastFlag sff{"LuauNegatedFunctionTypes", true}; registerHiddenTypes(&frontend); CheckResult result = check(R"( @@ -1803,7 +1802,6 @@ TEST_CASE_FIXTURE(Fixture, "function_is_supertype_of_concrete_functions") TEST_CASE_FIXTURE(Fixture, "concrete_functions_are_not_supertypes_of_function") { - ScopedFastFlag sff{"LuauNegatedFunctionTypes", true}; registerHiddenTypes(&frontend); CheckResult result = check(R"( @@ -1824,7 +1822,6 @@ TEST_CASE_FIXTURE(Fixture, "concrete_functions_are_not_supertypes_of_function") TEST_CASE_FIXTURE(Fixture, "other_things_are_not_related_to_function") { - ScopedFastFlag sff{"LuauNegatedFunctionTypes", true}; registerHiddenTypes(&frontend); CheckResult result = check(R"( diff --git a/tests/TypeInfer.loops.test.cpp b/tests/TypeInfer.loops.test.cpp index 511cbc76..7a134358 100644 --- a/tests/TypeInfer.loops.test.cpp +++ b/tests/TypeInfer.loops.test.cpp @@ -707,4 +707,26 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "cli_68448_iterators_need_not_accept_nil") CHECK(toString(requireType("makeEnum"), {true}) == "({a}) -> {| [a]: a |}"); } +TEST_CASE_FIXTURE(Fixture, "iterate_over_free_table") +{ + CheckResult result = check(R"( + function print(x) end + + function dump(tbl) + print(tbl.whatever) + for k, v in tbl do + print(k) + print(v) + end + end + )"); + + LUAU_REQUIRE_ERROR_COUNT(1, result); + + GenericError* ge = get(result.errors[0]); + REQUIRE(ge); + + CHECK("Cannot iterate over a table without indexer" == ge->message); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.oop.test.cpp b/tests/TypeInfer.oop.test.cpp index eb4937fd..f2b3d055 100644 --- a/tests/TypeInfer.oop.test.cpp +++ b/tests/TypeInfer.oop.test.cpp @@ -381,4 +381,29 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "react_style_oo") CHECK("string" == toString(requireType("hello"))); } +TEST_CASE_FIXTURE(BuiltinsFixture, "cycle_between_object_constructor_and_alias") +{ + CheckResult result = check(R"( + local T = {} + T.__index = T + + function T.new(): T + return setmetatable({}, T) + end + + export type T = typeof(T.new()) + + return T + )"); + + LUAU_REQUIRE_NO_ERRORS(result); + + auto module = getMainModule(); + + REQUIRE(module->exportedTypeBindings.count("T")); + + TypeId aliasType = module->exportedTypeBindings["T"].type; + CHECK_MESSAGE(get(follow(aliasType)), "Expected metatable type but got: " << toString(aliasType)); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.operators.test.cpp b/tests/TypeInfer.operators.test.cpp index 8c289c7b..174bc310 100644 --- a/tests/TypeInfer.operators.test.cpp +++ b/tests/TypeInfer.operators.test.cpp @@ -860,8 +860,6 @@ TEST_CASE_FIXTURE(Fixture, "operator_eq_operands_are_not_subtypes_of_each_other_ TEST_CASE_FIXTURE(Fixture, "operator_eq_completely_incompatible") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local a: string | number = "hi" local b: {x: string}? = {x = "bye"} @@ -970,8 +968,6 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "expected_types_through_binary_or") TEST_CASE_FIXTURE(ClassFixture, "unrelated_classes_cannot_be_compared") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local a = BaseClass.New() local b = UnrelatedClass.New() @@ -984,8 +980,6 @@ TEST_CASE_FIXTURE(ClassFixture, "unrelated_classes_cannot_be_compared") TEST_CASE_FIXTURE(Fixture, "unrelated_primitives_cannot_be_compared") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local c = 5 == true )"); diff --git a/tests/TypeInfer.provisional.test.cpp b/tests/TypeInfer.provisional.test.cpp index 30f77d68..38e7e2f3 100644 --- a/tests/TypeInfer.provisional.test.cpp +++ b/tests/TypeInfer.provisional.test.cpp @@ -176,8 +176,6 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "error_on_eq_metamethod_returning_a_type_othe // We need refine both operands as `never` in the `==` branch. TEST_CASE_FIXTURE(Fixture, "lvalue_equals_another_lvalue_with_no_overlap") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local function f(a: string, b: boolean?) if a == b then diff --git a/tests/TypeInfer.tables.test.cpp b/tests/TypeInfer.tables.test.cpp index 21ac6421..468adc2c 100644 --- a/tests/TypeInfer.tables.test.cpp +++ b/tests/TypeInfer.tables.test.cpp @@ -18,7 +18,6 @@ LUAU_FASTFLAG(LuauLowerBoundsCalculation); LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution); LUAU_FASTFLAG(LuauInstantiateInSubtyping) LUAU_FASTFLAG(LuauTypeMismatchInvarianceInError) -LUAU_FASTFLAG(LuauDontExtendUnsealedRValueTables) TEST_SUITE_BEGIN("TableTests"); @@ -913,10 +912,7 @@ TEST_CASE_FIXTURE(Fixture, "disallow_indexing_into_an_unsealed_table_with_no_ind local k1 = getConstant("key1") )"); - if (FFlag::LuauDontExtendUnsealedRValueTables) - CHECK("any" == toString(requireType("k1"))); - else - CHECK("a" == toString(requireType("k1"))); + CHECK("any" == toString(requireType("k1"))); LUAU_REQUIRE_NO_ERRORS(result); } @@ -3542,8 +3538,6 @@ _ = {_,} TEST_CASE_FIXTURE(Fixture, "when_augmenting_an_unsealed_table_with_an_indexer_apply_the_correct_scope_to_the_indexer_type") { - ScopedFastFlag sff{"LuauDontExtendUnsealedRValueTables", true}; - CheckResult result = check(R"( local events = {} local mockObserveEvent = function(_, key, callback) @@ -3572,8 +3566,6 @@ TEST_CASE_FIXTURE(Fixture, "when_augmenting_an_unsealed_table_with_an_indexer_ap TEST_CASE_FIXTURE(Fixture, "dont_extend_unsealed_tables_in_rvalue_position") { - ScopedFastFlag sff{"LuauDontExtendUnsealedRValueTables", true}; - CheckResult result = check(R"( local testDictionary = { FruitName = "Lemon", diff --git a/tests/TypeInfer.test.cpp b/tests/TypeInfer.test.cpp index 7c4bfb2e..7e317f2e 100644 --- a/tests/TypeInfer.test.cpp +++ b/tests/TypeInfer.test.cpp @@ -1194,7 +1194,6 @@ TEST_CASE_FIXTURE(Fixture, "dcr_delays_expansion_of_function_containing_blocked_ { ScopedFastFlag sff[] = { {"DebugLuauDeferredConstraintResolution", true}, - {"LuauTinyUnifyNormalsFix", true}, // If we run this with error-suppression, it triggers an assertion. // FATAL ERROR: Assertion failed: !"Internal error: Trying to normalize a BlockedType" {"LuauTransitiveSubtyping", false}, diff --git a/tools/faillist.txt b/tools/faillist.txt index 76e5972d..31fc82da 100644 --- a/tools/faillist.txt +++ b/tools/faillist.txt @@ -25,9 +25,6 @@ BuiltinTests.string_format_correctly_ordered_types BuiltinTests.string_format_report_all_type_errors_at_correct_positions BuiltinTests.string_format_tostring_specifier_type_constraint BuiltinTests.string_format_use_correct_argument2 -BuiltinTests.table_pack -BuiltinTests.table_pack_reduce -BuiltinTests.table_pack_variadic DefinitionTests.class_definition_overload_metamethods DefinitionTests.class_definition_string_props GenericsTests.apply_type_function_nested_generics2 @@ -114,7 +111,6 @@ TableTests.table_subtyping_with_missing_props_dont_report_multiple_errors TableTests.table_unification_4 TableTests.used_colon_instead_of_dot TableTests.used_dot_instead_of_colon -ToString.named_metatable_toStringNamedFunction ToString.toStringDetailed2 ToString.toStringErrorPack ToString.toStringNamedFunction_generic_pack @@ -137,6 +133,7 @@ TypeInfer.check_type_infer_recursion_count TypeInfer.cli_50041_committing_txnlog_in_apollo_client_error TypeInfer.dont_report_type_errors_within_an_AstExprError TypeInfer.dont_report_type_errors_within_an_AstStatError +TypeInfer.follow_on_new_types_in_substitution TypeInfer.fuzz_free_table_type_change_during_index_check TypeInfer.infer_assignment_value_types_mutable_lval TypeInfer.no_stack_overflow_from_isoptional diff --git a/tools/natvis/CodeGen.natvis b/tools/natvis/CodeGen.natvis index 5ff6e143..84fb3329 100644 --- a/tools/natvis/CodeGen.natvis +++ b/tools/natvis/CodeGen.natvis @@ -1,45 +1,46 @@ - - noreg - rip + + noreg + rip - al - cl - dl - bl + al + cl + dl + bl - eax - ecx - edx - ebx - esp - ebp - esi - edi - e{(int)index,d}d + eax + ecx + edx + ebx + esp + ebp + esi + edi + e{(int)index,d}d - rax - rcx - rdx - rbx - rsp - rbp - rsi - rdi - r{(int)index,d} + rax + rcx + rdx + rbx + rsp + rbp + rsi + rdi + r{(int)index,d} - xmm{(int)index,d} + xmm{(int)index,d} - ymm{(int)index,d} + ymm{(int)index,d} - + {base} {memSize,en} ptr[{base} + {index}*{(int)scale,d} + {imm}] {memSize,en} ptr[{index}*{(int)scale,d} + {imm}] {memSize,en} ptr[{base} + {imm}] + {memSize,en} ptr[{base} + {imm}] {memSize,en} ptr[{imm}] {imm}