From 1212fdacbfaa576b5a1a238bc9e793f2e9156d61 Mon Sep 17 00:00:00 2001 From: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com> Date: Fri, 31 Mar 2023 21:42:49 +0300 Subject: [PATCH] Sync to upstream/release/570 (#885) Once again, all of our changes this week are for new type solver and the JIT. In the new type solver, we fixed cyclic type alias handling and multiple stability issues. In the JIT, our main progress was for arm64, where, after lowering 36% of instructions, we start seeing first Luau functions executing natively. For x64, we performed code cleanup and refactoring to allow for future optimizations. --- Analysis/include/Luau/ConstraintSolver.h | 18 +- Analysis/include/Luau/Normalize.h | 6 +- Analysis/include/Luau/Scope.h | 6 +- Analysis/include/Luau/TypeInfer.h | 6 +- Analysis/src/AstQuery.cpp | 22 +- Analysis/src/Autocomplete.cpp | 51 +- Analysis/src/BuiltinDefinitions.cpp | 25 +- Analysis/src/ConstraintSolver.cpp | 121 +++-- Analysis/src/Frontend.cpp | 8 +- Analysis/src/Linter.cpp | 10 +- Analysis/src/Normalize.cpp | 81 ++- Analysis/src/Scope.cpp | 6 +- Analysis/src/TypeChecker2.cpp | 6 +- Analysis/src/TypeInfer.cpp | 53 +- Analysis/src/Unifier.cpp | 21 +- Ast/src/Lexer.cpp | 6 +- CodeGen/include/Luau/AddressA64.h | 7 +- CodeGen/include/Luau/AssemblyBuilderA64.h | 46 +- CodeGen/include/Luau/AssemblyBuilderX64.h | 1 + CodeGen/include/Luau/ConditionA64.h | 17 + CodeGen/include/Luau/IrCallWrapperX64.h | 82 +++ CodeGen/include/Luau/IrData.h | 60 ++- CodeGen/{src => include/Luau}/IrRegAllocX64.h | 9 +- CodeGen/include/Luau/IrUtils.h | 13 +- CodeGen/include/Luau/RegisterA64.h | 68 +++ CodeGen/src/AssemblyBuilderA64.cpp | 287 +++++++++- CodeGen/src/AssemblyBuilderX64.cpp | 9 +- CodeGen/src/CodeGen.cpp | 89 ++- CodeGen/src/CodeGenA64.cpp | 10 + CodeGen/src/CodeGenUtils.cpp | 87 ++- CodeGen/src/CodeGenUtils.h | 3 +- CodeGen/src/EmitBuiltinsX64.cpp | 148 ++--- CodeGen/src/EmitCommon.h | 7 + CodeGen/src/EmitCommonA64.cpp | 91 ++-- CodeGen/src/EmitCommonA64.h | 55 +- CodeGen/src/EmitCommonX64.cpp | 185 +++---- CodeGen/src/EmitCommonX64.h | 24 +- CodeGen/src/EmitInstructionA64.cpp | 67 ++- CodeGen/src/EmitInstructionA64.h | 4 + CodeGen/src/EmitInstructionX64.cpp | 11 +- CodeGen/src/EmitInstructionX64.h | 5 +- CodeGen/src/IrAnalysis.cpp | 26 +- CodeGen/src/IrBuilder.cpp | 21 +- CodeGen/src/IrCallWrapperX64.cpp | 400 ++++++++++++++ CodeGen/src/IrDump.cpp | 54 +- CodeGen/src/IrLoweringA64.cpp | 505 +++++++++++++++++- CodeGen/src/IrLoweringA64.h | 10 +- CodeGen/src/IrLoweringX64.cpp | 311 ++++++----- CodeGen/src/IrLoweringX64.h | 3 +- CodeGen/src/IrRegAllocA64.cpp | 174 ++++++ CodeGen/src/IrRegAllocA64.h | 55 ++ CodeGen/src/IrRegAllocX64.cpp | 68 ++- CodeGen/src/IrTranslateBuiltins.cpp | 68 ++- CodeGen/src/IrTranslation.cpp | 71 ++- CodeGen/src/IrUtils.cpp | 20 + CodeGen/src/NativeState.cpp | 2 + CodeGen/src/NativeState.h | 4 +- CodeGen/src/OptimizeConstProp.cpp | 29 +- Compiler/src/Compiler.cpp | 6 +- Sources.cmake | 8 +- VM/src/lbuiltins.cpp | 17 - fuzz/format.cpp | 1 + fuzz/linter.cpp | 15 +- fuzz/proto.cpp | 16 +- fuzz/typeck.cpp | 17 +- tests/AssemblyBuilderA64.test.cpp | 98 +++- tests/Autocomplete.test.cpp | 2 - tests/Compiler.test.cpp | 8 - tests/Conformance.test.cpp | 2 +- tests/ConstraintGraphBuilderFixture.cpp | 3 +- tests/IrBuilder.test.cpp | 278 +++++----- tests/IrCallWrapperX64.test.cpp | 484 +++++++++++++++++ tests/Lexer.test.cpp | 2 - tests/Linter.test.cpp | 4 - tests/Normalize.test.cpp | 1 - tests/Parser.test.cpp | 2 - tests/TypeInfer.aliases.test.cpp | 30 ++ tests/TypeInfer.functions.test.cpp | 3 - tests/TypeInfer.loops.test.cpp | 22 + tests/TypeInfer.oop.test.cpp | 25 + tests/TypeInfer.operators.test.cpp | 6 - tests/TypeInfer.provisional.test.cpp | 2 - tests/TypeInfer.tables.test.cpp | 10 +- tests/TypeInfer.test.cpp | 1 - tools/faillist.txt | 5 +- tools/natvis/CodeGen.natvis | 57 +- 86 files changed, 3551 insertions(+), 1226 deletions(-) create mode 100644 CodeGen/include/Luau/IrCallWrapperX64.h rename CodeGen/{src => include/Luau}/IrRegAllocX64.h (85%) create mode 100644 CodeGen/src/IrCallWrapperX64.cpp create mode 100644 CodeGen/src/IrRegAllocA64.cpp create mode 100644 CodeGen/src/IrRegAllocA64.h create mode 100644 tests/IrCallWrapperX64.test.cpp diff --git a/Analysis/include/Luau/ConstraintSolver.h b/Analysis/include/Luau/ConstraintSolver.h index e9e1e884..2feee236 100644 --- a/Analysis/include/Luau/ConstraintSolver.h +++ b/Analysis/include/Luau/ConstraintSolver.h @@ -53,7 +53,6 @@ struct ConstraintSolver NotNull builtinTypes; InternalErrorReporter iceReporter; NotNull normalizer; - NotNull reducer; // The entire set of constraints that the solver is trying to resolve. std::vector> constraints; NotNull rootScope; @@ -85,8 +84,7 @@ struct ConstraintSolver DcrLogger* logger; explicit ConstraintSolver(NotNull normalizer, NotNull rootScope, std::vector> constraints, - ModuleName moduleName, NotNull reducer, NotNull moduleResolver, std::vector requireCycles, - DcrLogger* logger); + ModuleName moduleName, NotNull moduleResolver, std::vector requireCycles, DcrLogger* logger); // Randomize the order in which to dispatch constraints void randomize(unsigned seed); @@ -219,6 +217,20 @@ struct ConstraintSolver void reportError(TypeError e); private: + + /** Helper used by tryDispatch(SubtypeConstraint) and + * tryDispatch(PackSubtypeConstraint) + * + * Attempts to unify subTy with superTy. If doing so would require unifying + * BlockedTypes, fail and block the constraint on those BlockedTypes. + * + * If unification fails, replace all free types with errorType. + * + * If unification succeeds, unblock every type changed by the unification. + */ + template + bool tryUnify(NotNull constraint, TID subTy, TID superTy); + /** * Marks a constraint as being blocked on a type or type pack. The constraint * solver will not attempt to dispatch blocked constraints until their diff --git a/Analysis/include/Luau/Normalize.h b/Analysis/include/Luau/Normalize.h index 15404707..efcb5108 100644 --- a/Analysis/include/Luau/Normalize.h +++ b/Analysis/include/Luau/Normalize.h @@ -191,12 +191,8 @@ struct NormalizedClassType // this type may contain `error`. struct NormalizedFunctionType { - NormalizedFunctionType(); - bool isTop = false; - // TODO: Remove this wrapping optional when clipping - // FFlagLuauNegatedFunctionTypes. - std::optional parts; + TypeIds parts; void resetToNever(); void resetToTop(); diff --git a/Analysis/include/Luau/Scope.h b/Analysis/include/Luau/Scope.h index 745ea47a..c3038fac 100644 --- a/Analysis/include/Luau/Scope.h +++ b/Analysis/include/Luau/Scope.h @@ -55,11 +55,11 @@ struct Scope std::optional lookup(DefId def) const; std::optional> lookupEx(Symbol sym); - std::optional lookupType(const Name& name); - std::optional lookupImportedType(const Name& moduleAlias, const Name& name); + std::optional lookupType(const Name& name) const; + std::optional lookupImportedType(const Name& moduleAlias, const Name& name) const; std::unordered_map privateTypePackBindings; - std::optional lookupPack(const Name& name); + std::optional lookupPack(const Name& name) const; // WARNING: This function linearly scans for a string key of equal value! It is thus O(n**2) std::optional linearSearchForBinding(const std::string& name, bool traverseScopeChain = true) const; diff --git a/Analysis/include/Luau/TypeInfer.h b/Analysis/include/Luau/TypeInfer.h index 68161794..7dae79c3 100644 --- a/Analysis/include/Luau/TypeInfer.h +++ b/Analysis/include/Luau/TypeInfer.h @@ -79,7 +79,8 @@ struct GlobalTypes // within a program are borrowed pointers into this set. struct TypeChecker { - explicit TypeChecker(const GlobalTypes& globals, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler); + explicit TypeChecker( + const ScopePtr& globalScope, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler); TypeChecker(const TypeChecker&) = delete; TypeChecker& operator=(const TypeChecker&) = delete; @@ -367,8 +368,7 @@ public: */ std::vector unTypePack(const ScopePtr& scope, TypePackId pack, size_t expectedLength, const Location& location); - // TODO: only const version of global scope should be available to make sure nothing else is modified inside of from users of TypeChecker - const GlobalTypes& globals; + const ScopePtr& globalScope; ModuleResolver* resolver; ModulePtr currentModule; diff --git a/Analysis/src/AstQuery.cpp b/Analysis/src/AstQuery.cpp index b0c3750b..dc07a35c 100644 --- a/Analysis/src/AstQuery.cpp +++ b/Analysis/src/AstQuery.cpp @@ -11,8 +11,6 @@ #include -LUAU_FASTFLAG(LuauCompleteTableKeysBetter); - namespace Luau { @@ -31,24 +29,12 @@ struct AutocompleteNodeFinder : public AstVisitor bool visit(AstExpr* expr) override { - if (FFlag::LuauCompleteTableKeysBetter) + if (expr->location.begin <= pos && pos <= expr->location.end) { - if (expr->location.begin <= pos && pos <= expr->location.end) - { - ancestry.push_back(expr); - return true; - } - return false; - } - else - { - if (expr->location.begin < pos && pos <= expr->location.end) - { - ancestry.push_back(expr); - return true; - } - return false; + ancestry.push_back(expr); + return true; } + return false; } bool visit(AstStat* stat) override diff --git a/Analysis/src/Autocomplete.cpp b/Analysis/src/Autocomplete.cpp index 1df4d3d7..3fdd9319 100644 --- a/Analysis/src/Autocomplete.cpp +++ b/Analysis/src/Autocomplete.cpp @@ -13,7 +13,6 @@ #include #include -LUAU_FASTFLAGVARIABLE(LuauCompleteTableKeysBetter, false); LUAU_FASTFLAGVARIABLE(LuauAutocompleteSkipNormalization, false); static const std::unordered_set kStatementStartingKeywords = { @@ -981,25 +980,14 @@ T* extractStat(const std::vector& ancestry) AstNode* grandParent = ancestry.size() >= 3 ? ancestry.rbegin()[2] : nullptr; AstNode* greatGrandParent = ancestry.size() >= 4 ? ancestry.rbegin()[3] : nullptr; - if (FFlag::LuauCompleteTableKeysBetter) - { - if (!grandParent) - return nullptr; + if (!grandParent) + return nullptr; - if (T* t = parent->as(); t && grandParent->is()) - return t; + if (T* t = parent->as(); t && grandParent->is()) + return t; - if (!greatGrandParent) - return nullptr; - } - else - { - if (T* t = parent->as(); t && parent->is()) - return t; - - if (!grandParent || !greatGrandParent) - return nullptr; - } + if (!greatGrandParent) + return nullptr; if (T* t = greatGrandParent->as(); t && grandParent->is() && parent->is() && isIdentifier(node)) return t; @@ -1533,23 +1521,20 @@ static AutocompleteResult autocomplete(const SourceModule& sourceModule, const M { auto result = autocompleteProps(*module, typeArena, builtinTypes, *it, PropIndexType::Key, ancestry); - if (FFlag::LuauCompleteTableKeysBetter) - { - if (auto nodeIt = module->astExpectedTypes.find(node->asExpr())) - autocompleteStringSingleton(*nodeIt, !node->is(), result); + if (auto nodeIt = module->astExpectedTypes.find(node->asExpr())) + autocompleteStringSingleton(*nodeIt, !node->is(), result); - if (!key) + if (!key) + { + // If there is "no key," it may be that the user + // intends for the current token to be the key, but + // has yet to type the `=` sign. + // + // If the key type is a union of singleton strings, + // suggest those too. + if (auto ttv = get(follow(*it)); ttv && ttv->indexer) { - // If there is "no key," it may be that the user - // intends for the current token to be the key, but - // has yet to type the `=` sign. - // - // If the key type is a union of singleton strings, - // suggest those too. - if (auto ttv = get(follow(*it)); ttv && ttv->indexer) - { - autocompleteStringSingleton(ttv->indexer->indexType, false, result); - } + autocompleteStringSingleton(ttv->indexer->indexType, false, result); } } diff --git a/Analysis/src/BuiltinDefinitions.cpp b/Analysis/src/BuiltinDefinitions.cpp index d2ace49b..2108b160 100644 --- a/Analysis/src/BuiltinDefinitions.cpp +++ b/Analysis/src/BuiltinDefinitions.cpp @@ -15,8 +15,6 @@ #include -LUAU_FASTFLAGVARIABLE(LuauDeprecateTableGetnForeach, false) - /** FIXME: Many of these type definitions are not quite completely accurate. * * Some of them require richer generics than we have. For instance, we do not yet have a way to talk @@ -298,13 +296,10 @@ void registerBuiltinGlobals(TypeChecker& typeChecker, GlobalTypes& globals) ttv->props["freeze"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.freeze"); ttv->props["clone"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.clone"); - if (FFlag::LuauDeprecateTableGetnForeach) - { - ttv->props["getn"].deprecated = true; - ttv->props["getn"].deprecatedSuggestion = "#"; - ttv->props["foreach"].deprecated = true; - ttv->props["foreachi"].deprecated = true; - } + ttv->props["getn"].deprecated = true; + ttv->props["getn"].deprecatedSuggestion = "#"; + ttv->props["foreach"].deprecated = true; + ttv->props["foreachi"].deprecated = true; attachMagicFunction(ttv->props["pack"].type, magicFunctionPack); attachDcrMagicFunction(ttv->props["pack"].type, dcrMagicFunctionPack); @@ -401,15 +396,13 @@ void registerBuiltinGlobals(Frontend& frontend) ttv->props["freeze"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.freeze"); ttv->props["clone"] = makeProperty(makeFunction(arena, std::nullopt, {tabTy}, {tabTy}), "@luau/global/table.clone"); - if (FFlag::LuauDeprecateTableGetnForeach) - { - ttv->props["getn"].deprecated = true; - ttv->props["getn"].deprecatedSuggestion = "#"; - ttv->props["foreach"].deprecated = true; - ttv->props["foreachi"].deprecated = true; - } + ttv->props["getn"].deprecated = true; + ttv->props["getn"].deprecatedSuggestion = "#"; + ttv->props["foreach"].deprecated = true; + ttv->props["foreachi"].deprecated = true; attachMagicFunction(ttv->props["pack"].type, magicFunctionPack); + attachDcrMagicFunction(ttv->props["pack"].type, dcrMagicFunctionPack); } attachMagicFunction(getGlobalBinding(globals, "require"), magicFunctionRequire); diff --git a/Analysis/src/ConstraintSolver.cpp b/Analysis/src/ConstraintSolver.cpp index d5853932..d2bed2da 100644 --- a/Analysis/src/ConstraintSolver.cpp +++ b/Analysis/src/ConstraintSolver.cpp @@ -226,12 +226,10 @@ void dump(ConstraintSolver* cs, ToStringOptions& opts) } ConstraintSolver::ConstraintSolver(NotNull normalizer, NotNull rootScope, std::vector> constraints, - ModuleName moduleName, NotNull reducer, NotNull moduleResolver, std::vector requireCycles, - DcrLogger* logger) + ModuleName moduleName, NotNull moduleResolver, std::vector requireCycles, DcrLogger* logger) : arena(normalizer->arena) , builtinTypes(normalizer->builtinTypes) , normalizer(normalizer) - , reducer(reducer) , constraints(std::move(constraints)) , rootScope(rootScope) , currentModuleName(std::move(moduleName)) @@ -458,40 +456,7 @@ bool ConstraintSolver::tryDispatch(const SubtypeConstraint& c, NotNullscope, Location{}, Covariant}; - u.useScopes = true; - - u.tryUnify(c.subType, c.superType); - - if (!u.blockedTypes.empty() || !u.blockedTypePacks.empty()) - { - for (TypeId bt : u.blockedTypes) - block(bt, constraint); - for (TypePackId btp : u.blockedTypePacks) - block(btp, constraint); - return false; - } - - if (const auto& e = hasUnificationTooComplex(u.errors)) - reportError(*e); - - if (!u.errors.empty()) - { - TypeId errorType = errorRecoveryType(); - u.tryUnify(c.subType, errorType); - u.tryUnify(c.superType, errorType); - } - - const auto [changedTypes, changedPacks] = u.log.getChanges(); - - u.log.commit(); - - unblock(changedTypes); - unblock(changedPacks); - - // unify(c.subType, c.superType, constraint->scope); - - return true; + return tryUnify(constraint, c.subType, c.superType); } bool ConstraintSolver::tryDispatch(const PackSubtypeConstraint& c, NotNull constraint, bool force) @@ -501,9 +466,7 @@ bool ConstraintSolver::tryDispatch(const PackSubtypeConstraint& c, NotNullscope); - - return true; + return tryUnify(constraint, c.subPack, c.superPack); } bool ConstraintSolver::tryDispatch(const GeneralizationConstraint& c, NotNull constraint, bool force) @@ -1117,7 +1080,7 @@ bool ConstraintSolver::tryDispatch(const TypeAliasExpansionConstraint& c, NotNul InstantiationQueuer queuer{constraint->scope, constraint->location, this}; queuer.traverse(target); - if (target->persistent) + if (target->persistent || target->owningArena != arena) { bindResult(target); return true; @@ -1335,8 +1298,6 @@ bool ConstraintSolver::tryDispatch(const HasPropConstraint& c, NotNullreduce(subjectType).value_or(subjectType); - auto [blocked, result] = lookupTableProp(subjectType, c.prop); if (!blocked.empty()) { @@ -1716,8 +1677,15 @@ bool ConstraintSolver::tryDispatchIterableTable(TypeId iteratorTy, const Iterabl if (auto iteratorTable = get(iteratorTy)) { - if (iteratorTable->state == TableState::Free) - return block_(iteratorTy); + /* + * We try not to dispatch IterableConstraints over free tables because + * it's possible that there are other constraints on the table that will + * clarify what we should do. + * + * We should eventually introduce a type family to talk about iteration. + */ + if (iteratorTable->state == TableState::Free && !force) + return block(iteratorTy, constraint); if (iteratorTable->indexer) { @@ -1957,14 +1925,14 @@ std::pair, std::optional> ConstraintSolver::lookupTa else if (auto utv = get(subjectType)) { std::vector blocked; - std::vector options; + std::set options; for (TypeId ty : utv) { auto [innerBlocked, innerResult] = lookupTableProp(ty, propName, seen); blocked.insert(blocked.end(), innerBlocked.begin(), innerBlocked.end()); if (innerResult) - options.push_back(*innerResult); + options.insert(*innerResult); } if (!blocked.empty()) @@ -1973,21 +1941,21 @@ std::pair, std::optional> ConstraintSolver::lookupTa if (options.empty()) return {{}, std::nullopt}; else if (options.size() == 1) - return {{}, options[0]}; + return {{}, *begin(options)}; else - return {{}, arena->addType(UnionType{std::move(options)})}; + return {{}, arena->addType(UnionType{std::vector(begin(options), end(options))})}; } else if (auto itv = get(subjectType)) { std::vector blocked; - std::vector options; + std::set options; for (TypeId ty : itv) { auto [innerBlocked, innerResult] = lookupTableProp(ty, propName, seen); blocked.insert(blocked.end(), innerBlocked.begin(), innerBlocked.end()); if (innerResult) - options.push_back(*innerResult); + options.insert(*innerResult); } if (!blocked.empty()) @@ -1996,14 +1964,61 @@ std::pair, std::optional> ConstraintSolver::lookupTa if (options.empty()) return {{}, std::nullopt}; else if (options.size() == 1) - return {{}, options[0]}; + return {{}, *begin(options)}; else - return {{}, arena->addType(IntersectionType{std::move(options)})}; + return {{}, arena->addType(IntersectionType{std::vector(begin(options), end(options))})}; } return {{}, std::nullopt}; } +static TypeId getErrorType(NotNull builtinTypes, TypeId) +{ + return builtinTypes->errorRecoveryType(); +} + +static TypePackId getErrorType(NotNull builtinTypes, TypePackId) +{ + return builtinTypes->errorRecoveryTypePack(); +} + +template +bool ConstraintSolver::tryUnify(NotNull constraint, TID subTy, TID superTy) +{ + Unifier u{normalizer, Mode::Strict, constraint->scope, Location{}, Covariant}; + u.useScopes = true; + + u.tryUnify(subTy, superTy); + + if (!u.blockedTypes.empty() || !u.blockedTypePacks.empty()) + { + for (TypeId bt : u.blockedTypes) + block(bt, constraint); + for (TypePackId btp : u.blockedTypePacks) + block(btp, constraint); + return false; + } + + if (const auto& e = hasUnificationTooComplex(u.errors)) + reportError(*e); + + if (!u.errors.empty()) + { + TID errorType = getErrorType(builtinTypes, TID{}); + u.tryUnify(subTy, errorType); + u.tryUnify(superTy, errorType); + } + + const auto [changedTypes, changedPacks] = u.log.getChanges(); + + u.log.commit(); + + unblock(changedTypes); + unblock(changedPacks); + + return true; +} + void ConstraintSolver::block_(BlockedConstraintId target, NotNull constraint) { blocked[target].push_back(constraint); diff --git a/Analysis/src/Frontend.cpp b/Analysis/src/Frontend.cpp index a50933b7..191e94f4 100644 --- a/Analysis/src/Frontend.cpp +++ b/Analysis/src/Frontend.cpp @@ -435,8 +435,8 @@ Frontend::Frontend(FileResolver* fileResolver, ConfigResolver* configResolver, c , moduleResolverForAutocomplete(this) , globals(builtinTypes) , globalsForAutocomplete(builtinTypes) - , typeChecker(globals, &moduleResolver, builtinTypes, &iceHandler) - , typeCheckerForAutocomplete(globalsForAutocomplete, &moduleResolverForAutocomplete, builtinTypes, &iceHandler) + , typeChecker(globals.globalScope, &moduleResolver, builtinTypes, &iceHandler) + , typeCheckerForAutocomplete(globalsForAutocomplete.globalScope, &moduleResolverForAutocomplete, builtinTypes, &iceHandler) , configResolver(configResolver) , options(options) { @@ -970,8 +970,8 @@ ModulePtr check(const SourceModule& sourceModule, const std::vectorerrors = std::move(cgb.errors); - ConstraintSolver cs{NotNull{&normalizer}, NotNull(cgb.rootScope), borrowConstraints(cgb.constraints), sourceModule.name, - NotNull{result->reduction.get()}, moduleResolver, requireCycles, logger.get()}; + ConstraintSolver cs{NotNull{&normalizer}, NotNull(cgb.rootScope), borrowConstraints(cgb.constraints), sourceModule.name, moduleResolver, + requireCycles, logger.get()}; if (options.randomizeConstraintResolutionSeed) cs.randomize(*options.randomizeConstraintResolutionSeed); diff --git a/Analysis/src/Linter.cpp b/Analysis/src/Linter.cpp index f850bd3d..d6aafda6 100644 --- a/Analysis/src/Linter.cpp +++ b/Analysis/src/Linter.cpp @@ -14,8 +14,6 @@ LUAU_FASTINTVARIABLE(LuauSuggestionDistance, 4) -LUAU_FASTFLAGVARIABLE(LuauImproveDeprecatedApiLint, false) - namespace Luau { @@ -2102,9 +2100,6 @@ class LintDeprecatedApi : AstVisitor public: LUAU_NOINLINE static void process(LintContext& context) { - if (!FFlag::LuauImproveDeprecatedApiLint && !context.module) - return; - LintDeprecatedApi pass{&context}; context.root->visit(&pass); } @@ -2122,8 +2117,7 @@ private: if (std::optional ty = context->getType(node->expr)) check(node, follow(*ty)); else if (AstExprGlobal* global = node->expr->as()) - if (FFlag::LuauImproveDeprecatedApiLint) - check(node->location, global->name, node->index); + check(node->location, global->name, node->index); return true; } @@ -2144,7 +2138,7 @@ private: if (prop != tty->props.end() && prop->second.deprecated) { // strip synthetic typeof() for builtin tables - if (FFlag::LuauImproveDeprecatedApiLint && tty->name && tty->name->compare(0, 7, "typeof(") == 0 && tty->name->back() == ')') + if (tty->name && tty->name->compare(0, 7, "typeof(") == 0 && tty->name->back() == ')') report(node->location, prop->second, tty->name->substr(7, tty->name->length() - 8).c_str(), node->index.value); else report(node->location, prop->second, tty->name ? tty->name->c_str() : nullptr, node->index.value); diff --git a/Analysis/src/Normalize.cpp b/Analysis/src/Normalize.cpp index f383f5ea..7c56a4b8 100644 --- a/Analysis/src/Normalize.cpp +++ b/Analysis/src/Normalize.cpp @@ -18,7 +18,6 @@ LUAU_FASTFLAGVARIABLE(DebugLuauCheckNormalizeInvariant, false) LUAU_FASTINTVARIABLE(LuauNormalizeIterationLimit, 1200); LUAU_FASTINTVARIABLE(LuauNormalizeCacheLimit, 100000); LUAU_FASTFLAGVARIABLE(LuauNegatedClassTypes, false); -LUAU_FASTFLAGVARIABLE(LuauNegatedFunctionTypes, false); LUAU_FASTFLAGVARIABLE(LuauNegatedTableTypes, false); LUAU_FASTFLAGVARIABLE(LuauNormalizeBlockedTypes, false); LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution) @@ -202,26 +201,21 @@ bool NormalizedClassType::isNever() const return classes.empty(); } -NormalizedFunctionType::NormalizedFunctionType() - : parts(FFlag::LuauNegatedFunctionTypes ? std::optional{TypeIds{}} : std::nullopt) -{ -} - void NormalizedFunctionType::resetToTop() { isTop = true; - parts.emplace(); + parts.clear(); } void NormalizedFunctionType::resetToNever() { isTop = false; - parts.emplace(); + parts.clear(); } bool NormalizedFunctionType::isNever() const { - return !isTop && (!parts || parts->empty()); + return !isTop && parts.empty(); } NormalizedType::NormalizedType(NotNull builtinTypes) @@ -438,13 +432,10 @@ static bool isNormalizedThread(TypeId ty) static bool areNormalizedFunctions(const NormalizedFunctionType& tys) { - if (tys.parts) + for (TypeId ty : tys.parts) { - for (TypeId ty : *tys.parts) - { - if (!get(ty) && !get(ty)) - return false; - } + if (!get(ty) && !get(ty)) + return false; } return true; } @@ -1170,13 +1161,10 @@ std::optional Normalizer::unionOfFunctions(TypeId here, TypeId there) void Normalizer::unionFunctions(NormalizedFunctionType& heres, const NormalizedFunctionType& theres) { - if (FFlag::LuauNegatedFunctionTypes) - { - if (heres.isTop) - return; - if (theres.isTop) - heres.resetToTop(); - } + if (heres.isTop) + return; + if (theres.isTop) + heres.resetToTop(); if (theres.isNever()) return; @@ -1185,13 +1173,13 @@ void Normalizer::unionFunctions(NormalizedFunctionType& heres, const NormalizedF if (heres.isNever()) { - tmps.insert(theres.parts->begin(), theres.parts->end()); + tmps.insert(theres.parts.begin(), theres.parts.end()); heres.parts = std::move(tmps); return; } - for (TypeId here : *heres.parts) - for (TypeId there : *theres.parts) + for (TypeId here : heres.parts) + for (TypeId there : theres.parts) { if (std::optional fun = unionOfFunctions(here, there)) tmps.insert(*fun); @@ -1213,7 +1201,7 @@ void Normalizer::unionFunctionsWithFunction(NormalizedFunctionType& heres, TypeI } TypeIds tmps; - for (TypeId here : *heres.parts) + for (TypeId here : heres.parts) { if (std::optional fun = unionOfFunctions(here, there)) tmps.insert(*fun); @@ -1420,7 +1408,6 @@ bool Normalizer::unionNormalWithTy(NormalizedType& here, TypeId there, int ignor here.threads = there; else if (ptv->type == PrimitiveType::Function) { - LUAU_ASSERT(FFlag::LuauNegatedFunctionTypes); here.functions.resetToTop(); } else if (ptv->type == PrimitiveType::Table && FFlag::LuauNegatedTableTypes) @@ -1553,15 +1540,12 @@ std::optional Normalizer::negateNormal(const NormalizedType& her * arbitrary function types. Ordinary code can never form these kinds of * types, so we decline to negate them. */ - if (FFlag::LuauNegatedFunctionTypes) - { - if (here.functions.isNever()) - result.functions.resetToTop(); - else if (here.functions.isTop) - result.functions.resetToNever(); - else - return std::nullopt; - } + if (here.functions.isNever()) + result.functions.resetToTop(); + else if (here.functions.isTop) + result.functions.resetToNever(); + else + return std::nullopt; /* * It is not possible to negate an arbitrary table type, because function @@ -2390,15 +2374,15 @@ void Normalizer::intersectFunctionsWithFunction(NormalizedFunctionType& heres, T heres.isTop = false; - for (auto it = heres.parts->begin(); it != heres.parts->end();) + for (auto it = heres.parts.begin(); it != heres.parts.end();) { TypeId here = *it; if (get(here)) it++; else if (std::optional tmp = intersectionOfFunctions(here, there)) { - heres.parts->erase(it); - heres.parts->insert(*tmp); + heres.parts.erase(it); + heres.parts.insert(*tmp); return; } else @@ -2406,13 +2390,13 @@ void Normalizer::intersectFunctionsWithFunction(NormalizedFunctionType& heres, T } TypeIds tmps; - for (TypeId here : *heres.parts) + for (TypeId here : heres.parts) { if (std::optional tmp = unionSaturatedFunctions(here, there)) tmps.insert(*tmp); } - heres.parts->insert(there); - heres.parts->insert(tmps.begin(), tmps.end()); + heres.parts.insert(there); + heres.parts.insert(tmps.begin(), tmps.end()); } void Normalizer::intersectFunctions(NormalizedFunctionType& heres, const NormalizedFunctionType& theres) @@ -2426,7 +2410,7 @@ void Normalizer::intersectFunctions(NormalizedFunctionType& heres, const Normali } else { - for (TypeId there : *theres.parts) + for (TypeId there : theres.parts) intersectFunctionsWithFunction(heres, there); } } @@ -2621,10 +2605,7 @@ bool Normalizer::intersectNormalWithTy(NormalizedType& here, TypeId there) else if (ptv->type == PrimitiveType::Thread) here.threads = threads; else if (ptv->type == PrimitiveType::Function) - { - LUAU_ASSERT(FFlag::LuauNegatedFunctionTypes); here.functions = std::move(functions); - } else if (ptv->type == PrimitiveType::Table) { LUAU_ASSERT(FFlag::LuauNegatedTableTypes); @@ -2768,16 +2749,16 @@ TypeId Normalizer::typeFromNormal(const NormalizedType& norm) if (!get(norm.errors)) result.push_back(norm.errors); - if (FFlag::LuauNegatedFunctionTypes && norm.functions.isTop) + if (norm.functions.isTop) result.push_back(builtinTypes->functionType); else if (!norm.functions.isNever()) { - if (norm.functions.parts->size() == 1) - result.push_back(*norm.functions.parts->begin()); + if (norm.functions.parts.size() == 1) + result.push_back(*norm.functions.parts.begin()); else { std::vector parts; - parts.insert(parts.end(), norm.functions.parts->begin(), norm.functions.parts->end()); + parts.insert(parts.end(), norm.functions.parts.begin(), norm.functions.parts.end()); result.push_back(arena->addType(IntersectionType{std::move(parts)})); } } diff --git a/Analysis/src/Scope.cpp b/Analysis/src/Scope.cpp index f54ebe2a..2de381be 100644 --- a/Analysis/src/Scope.cpp +++ b/Analysis/src/Scope.cpp @@ -65,7 +65,7 @@ std::optional Scope::lookup(DefId def) const return std::nullopt; } -std::optional Scope::lookupType(const Name& name) +std::optional Scope::lookupType(const Name& name) const { const Scope* scope = this; while (true) @@ -85,7 +85,7 @@ std::optional Scope::lookupType(const Name& name) } } -std::optional Scope::lookupImportedType(const Name& moduleAlias, const Name& name) +std::optional Scope::lookupImportedType(const Name& moduleAlias, const Name& name) const { const Scope* scope = this; while (scope) @@ -110,7 +110,7 @@ std::optional Scope::lookupImportedType(const Name& moduleAlias, const return std::nullopt; } -std::optional Scope::lookupPack(const Name& name) +std::optional Scope::lookupPack(const Name& name) const { const Scope* scope = this; while (true) diff --git a/Analysis/src/TypeChecker2.cpp b/Analysis/src/TypeChecker2.cpp index ec71a583..c7d30f43 100644 --- a/Analysis/src/TypeChecker2.cpp +++ b/Analysis/src/TypeChecker2.cpp @@ -2075,12 +2075,12 @@ struct TypeChecker2 fetch(builtinTypes->functionType); else if (!norm.functions.isNever()) { - if (norm.functions.parts->size() == 1) - fetch(norm.functions.parts->front()); + if (norm.functions.parts.size() == 1) + fetch(norm.functions.parts.front()); else { std::vector parts; - parts.insert(parts.end(), norm.functions.parts->begin(), norm.functions.parts->end()); + parts.insert(parts.end(), norm.functions.parts.begin(), norm.functions.parts.end()); fetch(testArena.addType(IntersectionType{std::move(parts)})); } } diff --git a/Analysis/src/TypeInfer.cpp b/Analysis/src/TypeInfer.cpp index 48ff6a20..f4781558 100644 --- a/Analysis/src/TypeInfer.cpp +++ b/Analysis/src/TypeInfer.cpp @@ -26,7 +26,6 @@ #include LUAU_FASTFLAGVARIABLE(DebugLuauMagicTypes, false) -LUAU_FASTFLAGVARIABLE(LuauDontExtendUnsealedRValueTables, false) LUAU_FASTINTVARIABLE(LuauTypeInferRecursionLimit, 165) LUAU_FASTINTVARIABLE(LuauTypeInferIterationLimit, 20000) LUAU_FASTINTVARIABLE(LuauTypeInferTypePackLoopLimit, 5000) @@ -38,7 +37,6 @@ LUAU_FASTFLAGVARIABLE(LuauReturnAnyInsteadOfICE, false) // Eventually removed as LUAU_FASTFLAGVARIABLE(DebugLuauSharedSelf, false) LUAU_FASTFLAGVARIABLE(LuauTryhardAnd, false) LUAU_FASTFLAG(LuauInstantiateInSubtyping) -LUAU_FASTFLAGVARIABLE(LuauIntersectionTestForEquality, false) LUAU_FASTFLAG(LuauNegatedClassTypes) LUAU_FASTFLAGVARIABLE(LuauAllowIndexClassParameters, false) LUAU_FASTFLAG(LuauUninhabitedSubAnything2) @@ -228,8 +226,8 @@ GlobalTypes::GlobalTypes(NotNull builtinTypes) globalScope->addBuiltinTypeBinding("never", TypeFun{{}, builtinTypes->neverType}); } -TypeChecker::TypeChecker(const GlobalTypes& globals, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler) - : globals(globals) +TypeChecker::TypeChecker(const ScopePtr& globalScope, ModuleResolver* resolver, NotNull builtinTypes, InternalErrorReporter* iceHandler) + : globalScope(globalScope) , resolver(resolver) , builtinTypes(builtinTypes) , iceHandler(iceHandler) @@ -280,7 +278,7 @@ ModulePtr TypeChecker::checkWithoutRecursionCheck(const SourceModule& module, Mo unifierState.counters.recursionLimit = FInt::LuauTypeInferRecursionLimit; unifierState.counters.iterationLimit = unifierIterationLimit ? *unifierIterationLimit : FInt::LuauTypeInferIterationLimit; - ScopePtr parentScope = environmentScope.value_or(globals.globalScope); + ScopePtr parentScope = environmentScope.value_or(globalScope); ScopePtr moduleScope = std::make_shared(parentScope); if (module.cyclic) @@ -1656,7 +1654,7 @@ void TypeChecker::prototype(const ScopePtr& scope, const AstStatTypeAlias& typea } else { - if (globals.globalScope->builtinTypeNames.contains(name)) + if (globalScope->builtinTypeNames.contains(name)) { reportError(typealias.location, DuplicateTypeDefinition{name}); duplicateTypeAliases.insert({typealias.exported, name}); @@ -2690,7 +2688,7 @@ TypeId TypeChecker::checkRelationalOperation( if (get(lhsType) || get(rhsType)) return booleanType; - if (FFlag::LuauIntersectionTestForEquality && isEquality) + if (isEquality) { // Unless either type is free or any, an equality comparison is only // valid when the intersection of the two operands is non-empty. @@ -3261,16 +3259,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex { return it->second.type; } - else if (!FFlag::LuauDontExtendUnsealedRValueTables && (lhsTable->state == TableState::Unsealed || lhsTable->state == TableState::Free)) - { - TypeId theType = freshType(scope); - Property& property = lhsTable->props[name]; - property.type = theType; - property.location = expr.indexLocation; - return theType; - } - else if (FFlag::LuauDontExtendUnsealedRValueTables && - ((ctx == ValueContext::LValue && lhsTable->state == TableState::Unsealed) || lhsTable->state == TableState::Free)) + else if ((ctx == ValueContext::LValue && lhsTable->state == TableState::Unsealed) || lhsTable->state == TableState::Free) { TypeId theType = freshType(scope); Property& property = lhsTable->props[name]; @@ -3391,16 +3380,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex { return it->second.type; } - else if (!FFlag::LuauDontExtendUnsealedRValueTables && (exprTable->state == TableState::Unsealed || exprTable->state == TableState::Free)) - { - TypeId resultType = freshType(scope); - Property& property = exprTable->props[value->value.data]; - property.type = resultType; - property.location = expr.index->location; - return resultType; - } - else if (FFlag::LuauDontExtendUnsealedRValueTables && - ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free)) + else if ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free) { TypeId resultType = freshType(scope); Property& property = exprTable->props[value->value.data]; @@ -3416,14 +3396,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex unify(indexType, indexer.indexType, scope, expr.index->location); return indexer.indexResultType; } - else if (!FFlag::LuauDontExtendUnsealedRValueTables && (exprTable->state == TableState::Unsealed || exprTable->state == TableState::Free)) - { - TypeId resultType = freshType(exprTable->level); - exprTable->indexer = TableIndexer{anyIfNonstrict(indexType), anyIfNonstrict(resultType)}; - return resultType; - } - else if (FFlag::LuauDontExtendUnsealedRValueTables && - ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free)) + else if ((ctx == ValueContext::LValue && exprTable->state == TableState::Unsealed) || exprTable->state == TableState::Free) { TypeId indexerType = freshType(exprTable->level); unify(indexType, indexerType, scope, expr.location); @@ -3439,13 +3412,7 @@ TypeId TypeChecker::checkLValueBinding(const ScopePtr& scope, const AstExprIndex * has no indexer, we have no idea if it will work so we just return any * and hope for the best. */ - if (FFlag::LuauDontExtendUnsealedRValueTables) - return anyType; - else - { - TypeId resultType = freshType(scope); - return resultType; - } + return anyType; } } @@ -5997,7 +5964,7 @@ void TypeChecker::resolve(const TypeGuardPredicate& typeguardP, RefinementMap& r if (!typeguardP.isTypeof) return addRefinement(refis, typeguardP.lvalue, errorRecoveryType(scope)); - auto typeFun = globals.globalScope->lookupType(typeguardP.kind); + auto typeFun = globalScope->lookupType(typeguardP.kind); if (!typeFun || !typeFun->typeParams.empty() || !typeFun->typePackParams.empty()) return addRefinement(refis, typeguardP.lvalue, errorRecoveryType(scope)); diff --git a/Analysis/src/Unifier.cpp b/Analysis/src/Unifier.cpp index 5f01a606..b748d115 100644 --- a/Analysis/src/Unifier.cpp +++ b/Analysis/src/Unifier.cpp @@ -21,11 +21,9 @@ LUAU_FASTFLAGVARIABLE(LuauInstantiateInSubtyping, false) LUAU_FASTFLAGVARIABLE(LuauUninhabitedSubAnything2, false) LUAU_FASTFLAGVARIABLE(LuauMaintainScopesInUnifier, false) LUAU_FASTFLAGVARIABLE(LuauTransitiveSubtyping, false) -LUAU_FASTFLAGVARIABLE(LuauTinyUnifyNormalsFix, false) LUAU_FASTFLAG(LuauClassTypeVarsInSubstitution) LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution) LUAU_FASTFLAG(LuauNormalizeBlockedTypes) -LUAU_FASTFLAG(LuauNegatedFunctionTypes) LUAU_FASTFLAG(LuauNegatedClassTypes) LUAU_FASTFLAG(LuauNegatedTableTypes) @@ -615,8 +613,7 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool else if ((log.getMutable(superTy) || log.getMutable(superTy)) && log.getMutable(subTy)) tryUnifySingletons(subTy, superTy); - else if (auto ptv = get(superTy); - FFlag::LuauNegatedFunctionTypes && ptv && ptv->type == PrimitiveType::Function && get(subTy)) + else if (auto ptv = get(superTy); ptv && ptv->type == PrimitiveType::Function && get(subTy)) { // Ok. Do nothing. forall functions F, F <: function } @@ -1275,17 +1272,7 @@ void Unifier::tryUnifyNormalizedTypes( Unifier innerState = makeChildUnifier(); - if (FFlag::LuauTinyUnifyNormalsFix) - innerState.tryUnify(subTable, superTable); - else - { - if (get(superTable)) - innerState.tryUnifyWithMetatable(subTable, superTable, /* reversed */ false); - else if (get(subTable)) - innerState.tryUnifyWithMetatable(superTable, subTable, /* reversed */ true); - else - innerState.tryUnifyTables(subTable, superTable); - } + innerState.tryUnify(subTable, superTable); if (innerState.errors.empty()) { @@ -1304,7 +1291,7 @@ void Unifier::tryUnifyNormalizedTypes( { if (superNorm.functions.isNever()) return reportError(location, TypeMismatch{superTy, subTy, reason, error, mismatchContext()}); - for (TypeId superFun : *superNorm.functions.parts) + for (TypeId superFun : superNorm.functions.parts) { Unifier innerState = makeChildUnifier(); const FunctionType* superFtv = get(superFun); @@ -1343,7 +1330,7 @@ TypePackId Unifier::tryApplyOverloadedFunction(TypeId function, const Normalized std::optional result; const FunctionType* firstFun = nullptr; - for (TypeId overload : *overloads.parts) + for (TypeId overload : overloads.parts) { if (const FunctionType* ftv = get(overload)) { diff --git a/Ast/src/Lexer.cpp b/Ast/src/Lexer.cpp index dac3b95b..75b4fe30 100644 --- a/Ast/src/Lexer.cpp +++ b/Ast/src/Lexer.cpp @@ -6,8 +6,6 @@ #include -LUAU_FASTFLAGVARIABLE(LuauFixInterpStringMid, false) - namespace Luau { @@ -642,9 +640,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT } consume(); - Lexeme lexemeOutput(Location(start, position()), FFlag::LuauFixInterpStringMid ? formatType : Lexeme::InterpStringBegin, - &buffer[startOffset], offset - startOffset - 1); - return lexemeOutput; + return Lexeme(Location(start, position()), formatType, &buffer[startOffset], offset - startOffset - 1); } default: diff --git a/CodeGen/include/Luau/AddressA64.h b/CodeGen/include/Luau/AddressA64.h index 2c852046..2796ef70 100644 --- a/CodeGen/include/Luau/AddressA64.h +++ b/CodeGen/include/Luau/AddressA64.h @@ -3,6 +3,8 @@ #include "Luau/RegisterA64.h" +#include + namespace Luau { namespace CodeGen @@ -23,6 +25,10 @@ enum class AddressKindA64 : uint8_t struct AddressA64 { + // This is a little misleading since AddressA64 can encode offsets up to 1023*size where size depends on the load/store size + // For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB + static constexpr size_t kMaxOffset = 1023; + AddressA64(RegisterA64 base, int off = 0) : kind(AddressKindA64::imm) , base(base) @@ -30,7 +36,6 @@ struct AddressA64 , data(off) { LUAU_ASSERT(base.kind == KindA64::x || base == sp); - LUAU_ASSERT(off >= -256 && off < 4096); } AddressA64(RegisterA64 base, RegisterA64 offset) diff --git a/CodeGen/include/Luau/AssemblyBuilderA64.h b/CodeGen/include/Luau/AssemblyBuilderA64.h index 1190e975..0c738712 100644 --- a/CodeGen/include/Luau/AssemblyBuilderA64.h +++ b/CodeGen/include/Luau/AssemblyBuilderA64.h @@ -16,10 +16,15 @@ namespace CodeGen namespace A64 { +enum FeaturesA64 +{ + Feature_JSCVT = 1 << 0, +}; + class AssemblyBuilderA64 { public: - explicit AssemblyBuilderA64(bool logText); + explicit AssemblyBuilderA64(bool logText, unsigned int features = 0); ~AssemblyBuilderA64(); // Moves @@ -42,6 +47,7 @@ public: // Note: some arithmetic instructions also have versions that update flags (ADDS etc) but we aren't using them atm void cmp(RegisterA64 src1, RegisterA64 src2); void cmp(RegisterA64 src1, uint16_t src2); + void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond); // Bitwise // Note: shifted-register support and bitfield operations are omitted for simplicity @@ -93,6 +99,36 @@ public: // Address of code (label) void adr(RegisterA64 dst, Label& label); + // Floating-point scalar moves + void fmov(RegisterA64 dst, RegisterA64 src); + + // Floating-point scalar math + void fabs(RegisterA64 dst, RegisterA64 src); + void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void fneg(RegisterA64 dst, RegisterA64 src); + void fsqrt(RegisterA64 dst, RegisterA64 src); + void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + + // Floating-point rounding and conversions + void frinta(RegisterA64 dst, RegisterA64 src); + void frintm(RegisterA64 dst, RegisterA64 src); + void frintp(RegisterA64 dst, RegisterA64 src); + void fcvtzs(RegisterA64 dst, RegisterA64 src); + void fcvtzu(RegisterA64 dst, RegisterA64 src); + void scvtf(RegisterA64 dst, RegisterA64 src); + void ucvtf(RegisterA64 dst, RegisterA64 src); + + // Floating-point conversion to integer using JS rules (wrap around 2^32) and set Z flag + // note: this is part of ARM8.3 (JSCVT feature); support of this instruction needs to be checked at runtime + void fjcvtzs(RegisterA64 dst, RegisterA64 src); + + // Floating-point comparisons + void fcmp(RegisterA64 src1, RegisterA64 src2); + void fcmpz(RegisterA64 src); + void fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond); + // Run final checks bool finalize(); @@ -121,6 +157,7 @@ public: std::string text; const bool logText = false; + const unsigned int features = 0; // Maximum immediate argument to functions like add/sub/cmp static constexpr size_t kMaxImmediate = (1 << 12) - 1; @@ -134,13 +171,15 @@ private: void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op); void placeI12(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op); void placeI16(const char* name, RegisterA64 dst, int src, uint8_t op, int shift = 0); - void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size); + void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size, int sizelog); void placeBC(const char* name, Label& label, uint8_t op, uint8_t cond); void placeBCR(const char* name, Label& label, uint8_t op, RegisterA64 cond); void placeBR(const char* name, RegisterA64 src, uint32_t op); void placeADR(const char* name, RegisterA64 src, uint8_t op); void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label); - void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t size); + void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog); + void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc); + void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc); void place(uint32_t word); @@ -164,6 +203,7 @@ private: LUAU_NOINLINE void log(const char* opcode, RegisterA64 src, Label label); LUAU_NOINLINE void log(const char* opcode, RegisterA64 src); LUAU_NOINLINE void log(const char* opcode, Label label); + LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond); LUAU_NOINLINE void log(Label label); LUAU_NOINLINE void log(RegisterA64 reg); LUAU_NOINLINE void log(AddressA64 addr); diff --git a/CodeGen/include/Luau/AssemblyBuilderX64.h b/CodeGen/include/Luau/AssemblyBuilderX64.h index 17076ed6..2b2a849c 100644 --- a/CodeGen/include/Luau/AssemblyBuilderX64.h +++ b/CodeGen/include/Luau/AssemblyBuilderX64.h @@ -41,6 +41,7 @@ enum class ABIX64 class AssemblyBuilderX64 { public: + explicit AssemblyBuilderX64(bool logText, ABIX64 abi); explicit AssemblyBuilderX64(bool logText); ~AssemblyBuilderX64(); diff --git a/CodeGen/include/Luau/ConditionA64.h b/CodeGen/include/Luau/ConditionA64.h index 0beadad5..e94adbcf 100644 --- a/CodeGen/include/Luau/ConditionA64.h +++ b/CodeGen/include/Luau/ConditionA64.h @@ -8,28 +8,45 @@ namespace CodeGen namespace A64 { +// See Table C1-1 on page C1-229 of Arm ARM for A-profile architecture enum class ConditionA64 { + // EQ: integer (equal), floating-point (equal) Equal, + // NE: integer (not equal), floating-point (not equal or unordered) NotEqual, + // CS: integer (carry set), floating-point (greater than, equal or unordered) CarrySet, + // CC: integer (carry clear), floating-point (less than) CarryClear, + // MI: integer (negative), floating-point (less than) Minus, + // PL: integer (positive or zero), floating-point (greater than, equal or unordered) Plus, + // VS: integer (overflow), floating-point (unordered) Overflow, + // VC: integer (no overflow), floating-point (ordered) NoOverflow, + // HI: integer (unsigned higher), floating-point (greater than, or unordered) UnsignedGreater, + // LS: integer (unsigned lower or same), floating-point (less than or equal) UnsignedLessEqual, + // GE: integer (signed greater than or equal), floating-point (greater than or equal) GreaterEqual, + // LT: integer (signed less than), floating-point (less than, or unordered) Less, + + // GT: integer (signed greater than), floating-point (greater than) Greater, + // LE: integer (signed less than or equal), floating-point (less than, equal or unordered) LessEqual, + // AL: always Always, Count diff --git a/CodeGen/include/Luau/IrCallWrapperX64.h b/CodeGen/include/Luau/IrCallWrapperX64.h new file mode 100644 index 00000000..b70c8da6 --- /dev/null +++ b/CodeGen/include/Luau/IrCallWrapperX64.h @@ -0,0 +1,82 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#pragma once + +#include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrData.h" +#include "Luau/OperandX64.h" +#include "Luau/RegisterX64.h" + +#include + +// TODO: call wrapper can be used to suggest target registers for ScopedRegX64 to compute data into argument registers directly + +namespace Luau +{ +namespace CodeGen +{ +namespace X64 +{ + +// When IrInst operands are used, current instruction index is required to track lifetime +// In all other calls it is ok to omit the argument +constexpr uint32_t kInvalidInstIdx = ~0u; + +struct IrRegAllocX64; +struct ScopedRegX64; + +struct CallArgument +{ + SizeX64 targetSize = SizeX64::none; + + OperandX64 source = noreg; + IrOp sourceOp; + + OperandX64 target = noreg; + bool candidate = true; +}; + +class IrCallWrapperX64 +{ +public: + IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx = kInvalidInstIdx); + + void addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp = {}); + void addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg); + + void call(const OperandX64& func); + + IrRegAllocX64& regs; + AssemblyBuilderX64& build; + uint32_t instIdx = ~0u; + +private: + void assignTargetRegisters(); + void countRegisterUses(); + CallArgument* findNonInterferingArgument(); + bool interferesWithOperand(const OperandX64& op, RegisterX64 reg) const; + bool interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const; + bool interferesWithActiveTarget(RegisterX64 sourceReg) const; + void moveToTarget(CallArgument& arg); + void freeSourceRegisters(CallArgument& arg); + void renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement); + void renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement); + RegisterX64 findConflictingTarget() const; + + int getRegisterUses(RegisterX64 reg) const; + void addRegisterUse(RegisterX64 reg); + void removeRegisterUse(RegisterX64 reg); + + static const int kMaxCallArguments = 6; + std::array args; + int argCount = 0; + + OperandX64 funcOp; + + // Internal counters for remaining register use counts + std::array gprUses; + std::array xmmUses; +}; + +} // namespace X64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/include/Luau/IrData.h b/CodeGen/include/Luau/IrData.h index e8b2bc62..75216081 100644 --- a/CodeGen/include/Luau/IrData.h +++ b/CodeGen/include/Luau/IrData.h @@ -125,6 +125,26 @@ enum class IrCmd : uint8_t // A: double UNM_NUM, + // Round number to negative infinity (math.floor) + // A: double + FLOOR_NUM, + + // Round number to positive infinity (math.ceil) + // A: double + CEIL_NUM, + + // Round number to nearest integer number, rounding half-way cases away from zero (math.round) + // A: double + ROUND_NUM, + + // Get square root of the argument (math.sqrt) + // A: double + SQRT_NUM, + + // Get absolute value of the argument (math.abs) + // A: double + ABS_NUM, + // Compute Luau 'not' operation on destructured TValue // A: tag // B: double @@ -252,6 +272,7 @@ enum class IrCmd : uint8_t // A: Rn (where to store the result) // B: Rn (lhs) // C: Rn or Kn (rhs) + // D: int (TMS enum with arithmetic type) DO_ARITH, // Get length of a TValue of any type @@ -382,54 +403,53 @@ enum class IrCmd : uint8_t // C: Rn (source start) // D: int (count or -1 to assign values up to stack top) // E: unsigned int (table index to start from) - LOP_SETLIST, + SETLIST, // Call specified function // A: Rn (function, followed by arguments) // B: int (argument count or -1 to use all arguments up to stack top) // C: int (result count or -1 to preserve all results and adjust stack top) // Note: return values are placed starting from Rn specified in 'A' - LOP_CALL, + CALL, // Return specified values from the function // A: Rn (value start) // B: int (result count or -1 to return all values up to stack top) - LOP_RETURN, + RETURN, // Adjust loop variables for one iteration of a generic for loop, jump back to the loop header if loop needs to continue // A: Rn (loop variable start, updates Rn+2 and 'B' number of registers starting from Rn+3) // B: int (loop variable count, if more than 2, registers starting from Rn+5 are set to nil) // C: block (repeat) // D: block (exit) - LOP_FORGLOOP, + FORGLOOP, // Handle LOP_FORGLOOP fallback when variable being iterated is not a table - // A: unsigned int (bytecode instruction index) - // B: Rn (loop state start, updates Rn+2 and 'C' number of registers starting from Rn+3) - // C: int (loop variable count and a MSB set when it's an ipairs-like iteration loop) - // D: block (repeat) - // E: block (exit) - LOP_FORGLOOP_FALLBACK, + // A: Rn (loop state start, updates Rn+2 and 'B' number of registers starting from Rn+3) + // B: int (loop variable count and a MSB set when it's an ipairs-like iteration loop) + // C: block (repeat) + // D: block (exit) + FORGLOOP_FALLBACK, // Fallback for generic for loop preparation when iterating over builtin pairs/ipairs // It raises an error if 'B' register is not a function // A: unsigned int (bytecode instruction index) // B: Rn // C: block (forgloop location) - LOP_FORGPREP_XNEXT_FALLBACK, + FORGPREP_XNEXT_FALLBACK, // Perform `and` or `or` operation (selecting lhs or rhs based on whether the lhs is truthy) and put the result into target register // A: Rn (target) // B: Rn (lhs) // C: Rn or Kn (rhs) - LOP_AND, - LOP_ANDK, - LOP_OR, - LOP_ORK, + AND, + ANDK, + OR, + ORK, // Increment coverage data (saturating 24 bit add) // A: unsigned int (bytecode instruction index) - LOP_COVERAGE, + COVERAGE, // Operations that have a translation, but use a full instruction fallback @@ -676,6 +696,14 @@ struct IrFunction return instructions[op.index]; } + IrInst* asInstOp(IrOp op) + { + if (op.kind == IrOpKind::Inst) + return &instructions[op.index]; + + return nullptr; + } + IrConst& constOp(IrOp op) { LUAU_ASSERT(op.kind == IrOpKind::Constant); diff --git a/CodeGen/src/IrRegAllocX64.h b/CodeGen/include/Luau/IrRegAllocX64.h similarity index 85% rename from CodeGen/src/IrRegAllocX64.h rename to CodeGen/include/Luau/IrRegAllocX64.h index 497bb035..c2486faf 100644 --- a/CodeGen/src/IrRegAllocX64.h +++ b/CodeGen/include/Luau/IrRegAllocX64.h @@ -24,12 +24,17 @@ struct IrRegAllocX64 RegisterX64 allocGprRegOrReuse(SizeX64 preferredSize, uint32_t index, std::initializer_list oprefs); RegisterX64 allocXmmRegOrReuse(uint32_t index, std::initializer_list oprefs); - RegisterX64 takeGprReg(RegisterX64 reg); + RegisterX64 takeReg(RegisterX64 reg); void freeReg(RegisterX64 reg); void freeLastUseReg(IrInst& target, uint32_t index); void freeLastUseRegs(const IrInst& inst, uint32_t index); + bool isLastUseReg(const IrInst& target, uint32_t index) const; + + bool shouldFreeGpr(RegisterX64 reg) const; + + void assertFree(RegisterX64 reg) const; void assertAllFree() const; IrFunction& function; @@ -51,6 +56,8 @@ struct ScopedRegX64 void alloc(SizeX64 size); void free(); + RegisterX64 release(); + IrRegAllocX64& owner; RegisterX64 reg; }; diff --git a/CodeGen/include/Luau/IrUtils.h b/CodeGen/include/Luau/IrUtils.h index 0fc14025..6e73e47a 100644 --- a/CodeGen/include/Luau/IrUtils.h +++ b/CodeGen/include/Luau/IrUtils.h @@ -99,10 +99,10 @@ inline bool isBlockTerminator(IrCmd cmd) case IrCmd::JUMP_CMP_NUM: case IrCmd::JUMP_CMP_ANY: case IrCmd::JUMP_SLOT_MATCH: - case IrCmd::LOP_RETURN: - case IrCmd::LOP_FORGLOOP: - case IrCmd::LOP_FORGLOOP_FALLBACK: - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::RETURN: + case IrCmd::FORGLOOP: + case IrCmd::FORGLOOP_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: case IrCmd::FALLBACK_FORGPREP: return true; default: @@ -137,6 +137,11 @@ inline bool hasResult(IrCmd cmd) case IrCmd::MIN_NUM: case IrCmd::MAX_NUM: case IrCmd::UNM_NUM: + case IrCmd::FLOOR_NUM: + case IrCmd::CEIL_NUM: + case IrCmd::ROUND_NUM: + case IrCmd::SQRT_NUM: + case IrCmd::ABS_NUM: case IrCmd::NOT_ANY: case IrCmd::TABLE_LEN: case IrCmd::NEW_TABLE: diff --git a/CodeGen/include/Luau/RegisterA64.h b/CodeGen/include/Luau/RegisterA64.h index 519e83fc..242e8b79 100644 --- a/CodeGen/include/Luau/RegisterA64.h +++ b/CodeGen/include/Luau/RegisterA64.h @@ -17,6 +17,8 @@ enum class KindA64 : uint8_t none, w, // 32-bit GPR x, // 64-bit GPR + d, // 64-bit SIMD&FP scalar + q, // 128-bit SIMD&FP vector }; struct RegisterA64 @@ -105,6 +107,72 @@ constexpr RegisterA64 xzr{KindA64::x, 31}; constexpr RegisterA64 sp{KindA64::none, 31}; +constexpr RegisterA64 d0{KindA64::d, 0}; +constexpr RegisterA64 d1{KindA64::d, 1}; +constexpr RegisterA64 d2{KindA64::d, 2}; +constexpr RegisterA64 d3{KindA64::d, 3}; +constexpr RegisterA64 d4{KindA64::d, 4}; +constexpr RegisterA64 d5{KindA64::d, 5}; +constexpr RegisterA64 d6{KindA64::d, 6}; +constexpr RegisterA64 d7{KindA64::d, 7}; +constexpr RegisterA64 d8{KindA64::d, 8}; +constexpr RegisterA64 d9{KindA64::d, 9}; +constexpr RegisterA64 d10{KindA64::d, 10}; +constexpr RegisterA64 d11{KindA64::d, 11}; +constexpr RegisterA64 d12{KindA64::d, 12}; +constexpr RegisterA64 d13{KindA64::d, 13}; +constexpr RegisterA64 d14{KindA64::d, 14}; +constexpr RegisterA64 d15{KindA64::d, 15}; +constexpr RegisterA64 d16{KindA64::d, 16}; +constexpr RegisterA64 d17{KindA64::d, 17}; +constexpr RegisterA64 d18{KindA64::d, 18}; +constexpr RegisterA64 d19{KindA64::d, 19}; +constexpr RegisterA64 d20{KindA64::d, 20}; +constexpr RegisterA64 d21{KindA64::d, 21}; +constexpr RegisterA64 d22{KindA64::d, 22}; +constexpr RegisterA64 d23{KindA64::d, 23}; +constexpr RegisterA64 d24{KindA64::d, 24}; +constexpr RegisterA64 d25{KindA64::d, 25}; +constexpr RegisterA64 d26{KindA64::d, 26}; +constexpr RegisterA64 d27{KindA64::d, 27}; +constexpr RegisterA64 d28{KindA64::d, 28}; +constexpr RegisterA64 d29{KindA64::d, 29}; +constexpr RegisterA64 d30{KindA64::d, 30}; +constexpr RegisterA64 d31{KindA64::d, 31}; + +constexpr RegisterA64 q0{KindA64::q, 0}; +constexpr RegisterA64 q1{KindA64::q, 1}; +constexpr RegisterA64 q2{KindA64::q, 2}; +constexpr RegisterA64 q3{KindA64::q, 3}; +constexpr RegisterA64 q4{KindA64::q, 4}; +constexpr RegisterA64 q5{KindA64::q, 5}; +constexpr RegisterA64 q6{KindA64::q, 6}; +constexpr RegisterA64 q7{KindA64::q, 7}; +constexpr RegisterA64 q8{KindA64::q, 8}; +constexpr RegisterA64 q9{KindA64::q, 9}; +constexpr RegisterA64 q10{KindA64::q, 10}; +constexpr RegisterA64 q11{KindA64::q, 11}; +constexpr RegisterA64 q12{KindA64::q, 12}; +constexpr RegisterA64 q13{KindA64::q, 13}; +constexpr RegisterA64 q14{KindA64::q, 14}; +constexpr RegisterA64 q15{KindA64::q, 15}; +constexpr RegisterA64 q16{KindA64::q, 16}; +constexpr RegisterA64 q17{KindA64::q, 17}; +constexpr RegisterA64 q18{KindA64::q, 18}; +constexpr RegisterA64 q19{KindA64::q, 19}; +constexpr RegisterA64 q20{KindA64::q, 20}; +constexpr RegisterA64 q21{KindA64::q, 21}; +constexpr RegisterA64 q22{KindA64::q, 22}; +constexpr RegisterA64 q23{KindA64::q, 23}; +constexpr RegisterA64 q24{KindA64::q, 24}; +constexpr RegisterA64 q25{KindA64::q, 25}; +constexpr RegisterA64 q26{KindA64::q, 26}; +constexpr RegisterA64 q27{KindA64::q, 27}; +constexpr RegisterA64 q28{KindA64::q, 28}; +constexpr RegisterA64 q29{KindA64::q, 29}; +constexpr RegisterA64 q30{KindA64::q, 30}; +constexpr RegisterA64 q31{KindA64::q, 31}; + } // namespace A64 } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/AssemblyBuilderA64.cpp b/CodeGen/src/AssemblyBuilderA64.cpp index bedd2740..a80003e9 100644 --- a/CodeGen/src/AssemblyBuilderA64.cpp +++ b/CodeGen/src/AssemblyBuilderA64.cpp @@ -21,8 +21,9 @@ static_assert(sizeof(textForCondition) / sizeof(textForCondition[0]) == size_t(C const unsigned kMaxAlign = 32; -AssemblyBuilderA64::AssemblyBuilderA64(bool logText) +AssemblyBuilderA64::AssemblyBuilderA64(bool logText, unsigned int features) : logText(logText) + , features(features) { data.resize(4096); dataPos = data.size(); // data is filled backwards @@ -39,6 +40,9 @@ AssemblyBuilderA64::~AssemblyBuilderA64() void AssemblyBuilderA64::mov(RegisterA64 dst, RegisterA64 src) { + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst == sp); + LUAU_ASSERT(dst.kind == src.kind || (dst.kind == KindA64::x && src == sp) || (dst == sp && src.kind == KindA64::x)); + if (dst == sp || src == sp) placeR1("mov", dst, src, 0b00'100010'0'000000000000); else @@ -115,6 +119,13 @@ void AssemblyBuilderA64::cmp(RegisterA64 src1, uint16_t src2) placeI12("cmp", dst, src1, src2, 0b11'10001); } +void AssemblyBuilderA64::csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond) +{ + LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); + + placeCS("csel", dst, src1, src2, cond, 0b11010'10'0, 0b00); +} + void AssemblyBuilderA64::and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) { placeSR3("and", dst, src1, src2, 0b00'01010); @@ -157,54 +168,76 @@ void AssemblyBuilderA64::ror(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2 void AssemblyBuilderA64::clz(RegisterA64 dst, RegisterA64 src) { + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(dst.kind == src.kind); + placeR1("clz", dst, src, 0b10'11010110'00000'00010'0); } void AssemblyBuilderA64::rbit(RegisterA64 dst, RegisterA64 src) { + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(dst.kind == src.kind); + placeR1("rbit", dst, src, 0b10'11010110'00000'0000'00); } void AssemblyBuilderA64::ldr(RegisterA64 dst, AddressA64 src) { - LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); + LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w || dst.kind == KindA64::d || dst.kind == KindA64::q); - placeA("ldr", dst, src, 0b11100001, 0b10 | uint8_t(dst.kind == KindA64::x)); + switch (dst.kind) + { + case KindA64::w: + placeA("ldr", dst, src, 0b11100001, 0b10, 2); + break; + case KindA64::x: + placeA("ldr", dst, src, 0b11100001, 0b11, 3); + break; + case KindA64::d: + placeA("ldr", dst, src, 0b11110001, 0b11, 3); + break; + case KindA64::q: + placeA("ldr", dst, src, 0b11110011, 0b00, 4); + break; + case KindA64::none: + LUAU_ASSERT(!"Unexpected register kind"); + } } void AssemblyBuilderA64::ldrb(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::w); - placeA("ldrb", dst, src, 0b11100001, 0b00); + placeA("ldrb", dst, src, 0b11100001, 0b00, 2); } void AssemblyBuilderA64::ldrh(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::w); - placeA("ldrh", dst, src, 0b11100001, 0b01); + placeA("ldrh", dst, src, 0b11100001, 0b01, 2); } void AssemblyBuilderA64::ldrsb(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); - placeA("ldrsb", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b00); + placeA("ldrsb", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b00, 0); } void AssemblyBuilderA64::ldrsh(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); - placeA("ldrsh", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b01); + placeA("ldrsh", dst, src, 0b11100010 | uint8_t(dst.kind == KindA64::w), 0b01, 1); } void AssemblyBuilderA64::ldrsw(RegisterA64 dst, AddressA64 src) { LUAU_ASSERT(dst.kind == KindA64::x); - placeA("ldrsw", dst, src, 0b11100010, 0b10); + placeA("ldrsw", dst, src, 0b11100010, 0b10, 2); } void AssemblyBuilderA64::ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src) @@ -212,28 +245,44 @@ void AssemblyBuilderA64::ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src) LUAU_ASSERT(dst1.kind == KindA64::x || dst1.kind == KindA64::w); LUAU_ASSERT(dst1.kind == dst2.kind); - placeP("ldp", dst1, dst2, src, 0b101'0'010'1, 0b10 | uint8_t(dst1.kind == KindA64::x)); + placeP("ldp", dst1, dst2, src, 0b101'0'010'1, uint8_t(dst1.kind == KindA64::x) << 1, dst1.kind == KindA64::x ? 3 : 2); } void AssemblyBuilderA64::str(RegisterA64 src, AddressA64 dst) { - LUAU_ASSERT(src.kind == KindA64::x || src.kind == KindA64::w); + LUAU_ASSERT(src.kind == KindA64::x || src.kind == KindA64::w || src.kind == KindA64::d || src.kind == KindA64::q); - placeA("str", src, dst, 0b11100000, 0b10 | uint8_t(src.kind == KindA64::x)); + switch (src.kind) + { + case KindA64::w: + placeA("str", src, dst, 0b11100000, 0b10, 2); + break; + case KindA64::x: + placeA("str", src, dst, 0b11100000, 0b11, 3); + break; + case KindA64::d: + placeA("str", src, dst, 0b11110000, 0b11, 3); + break; + case KindA64::q: + placeA("str", src, dst, 0b11110010, 0b00, 4); + break; + case KindA64::none: + LUAU_ASSERT(!"Unexpected register kind"); + } } void AssemblyBuilderA64::strb(RegisterA64 src, AddressA64 dst) { LUAU_ASSERT(src.kind == KindA64::w); - placeA("strb", src, dst, 0b11100000, 0b00); + placeA("strb", src, dst, 0b11100000, 0b00, 2); } void AssemblyBuilderA64::strh(RegisterA64 src, AddressA64 dst) { LUAU_ASSERT(src.kind == KindA64::w); - placeA("strh", src, dst, 0b11100000, 0b01); + placeA("strh", src, dst, 0b11100000, 0b01, 2); } void AssemblyBuilderA64::stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst) @@ -241,7 +290,7 @@ void AssemblyBuilderA64::stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst) LUAU_ASSERT(src1.kind == KindA64::x || src1.kind == KindA64::w); LUAU_ASSERT(src1.kind == src2.kind); - placeP("stp", src1, src2, dst, 0b101'0'010'0, 0b10 | uint8_t(src1.kind == KindA64::x)); + placeP("stp", src1, src2, dst, 0b101'0'010'0, uint8_t(src1.kind == KindA64::x) << 1, src1.kind == KindA64::x ? 3 : 2); } void AssemblyBuilderA64::b(Label& label) @@ -318,6 +367,145 @@ void AssemblyBuilderA64::adr(RegisterA64 dst, Label& label) placeADR("adr", dst, 0b10000, label); } +void AssemblyBuilderA64::fmov(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fmov", dst, src, 0b000'11110'01'1'0000'00'10000); +} + +void AssemblyBuilderA64::fabs(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fabs", dst, src, 0b000'11110'01'1'0000'01'10000); +} + +void AssemblyBuilderA64::fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fadd", dst, src1, src2, 0b11110'01'1, 0b0010'10); +} + +void AssemblyBuilderA64::fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fdiv", dst, src1, src2, 0b11110'01'1, 0b0001'10); +} + +void AssemblyBuilderA64::fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fmul", dst, src1, src2, 0b11110'01'1, 0b0000'10); +} + +void AssemblyBuilderA64::fneg(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fneg", dst, src, 0b000'11110'01'1'0000'10'10000); +} + +void AssemblyBuilderA64::fsqrt(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("fsqrt", dst, src, 0b000'11110'01'1'0000'11'10000); +} + +void AssemblyBuilderA64::fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeR3("fsub", dst, src1, src2, 0b11110'01'1, 0b0011'10); +} + +void AssemblyBuilderA64::frinta(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("frinta", dst, src, 0b000'11110'01'1'001'100'10000); +} + +void AssemblyBuilderA64::frintm(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("frintm", dst, src, 0b000'11110'01'1'001'010'10000); +} + +void AssemblyBuilderA64::frintp(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d && src.kind == KindA64::d); + + placeR1("frintp", dst, src, 0b000'11110'01'1'001'001'10000); +} + +void AssemblyBuilderA64::fcvtzs(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(src.kind == KindA64::d); + + placeR1("fcvtzs", dst, src, 0b000'11110'01'1'11'000'000000); +} + +void AssemblyBuilderA64::fcvtzu(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(src.kind == KindA64::d); + + placeR1("fcvtzu", dst, src, 0b000'11110'01'1'11'001'000000); +} + +void AssemblyBuilderA64::scvtf(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d); + LUAU_ASSERT(src.kind == KindA64::w || src.kind == KindA64::x); + + placeR1("scvtf", dst, src, 0b000'11110'01'1'00'010'000000); +} + +void AssemblyBuilderA64::ucvtf(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::d); + LUAU_ASSERT(src.kind == KindA64::w || src.kind == KindA64::x); + + placeR1("ucvtf", dst, src, 0b000'11110'01'1'00'011'000000); +} + +void AssemblyBuilderA64::fjcvtzs(RegisterA64 dst, RegisterA64 src) +{ + LUAU_ASSERT(dst.kind == KindA64::w); + LUAU_ASSERT(src.kind == KindA64::d); + LUAU_ASSERT(features & Feature_JSCVT); + + placeR1("fjcvtzs", dst, src, 0b000'11110'01'1'11'110'000000); +} + +void AssemblyBuilderA64::fcmp(RegisterA64 src1, RegisterA64 src2) +{ + LUAU_ASSERT(src1.kind == KindA64::d && src2.kind == KindA64::d); + + placeFCMP("fcmp", src1, src2, 0b11110'01'1, 0b00); +} + +void AssemblyBuilderA64::fcmpz(RegisterA64 src) +{ + LUAU_ASSERT(src.kind == KindA64::d); + + placeFCMP("fcmp", src, RegisterA64{src.kind, 0}, 0b11110'01'1, 0b01); +} + +void AssemblyBuilderA64::fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond) +{ + LUAU_ASSERT(dst.kind == KindA64::d); + + placeCS("fcsel", dst, src1, src2, cond, 0b11110'01'1, 0b11); +} + bool AssemblyBuilderA64::finalize() { code.resize(codePos - code.data()); @@ -429,7 +617,7 @@ void AssemblyBuilderA64::placeR3(const char* name, RegisterA64 dst, RegisterA64 if (logText) log(name, dst, src1, src2); - LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst.kind == KindA64::d); LUAU_ASSERT(dst.kind == src1.kind && dst.kind == src2.kind); uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0; @@ -443,10 +631,7 @@ void AssemblyBuilderA64::placeR1(const char* name, RegisterA64 dst, RegisterA64 if (logText) log(name, dst, src); - LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst == sp); - LUAU_ASSERT(dst.kind == src.kind || (dst.kind == KindA64::x && src == sp) || (dst == sp && src.kind == KindA64::x)); - - uint32_t sf = (dst.kind != KindA64::w) ? 0x80000000 : 0; + uint32_t sf = (dst.kind == KindA64::x || src.kind == KindA64::x) ? 0x80000000 : 0; place(dst.index | (src.index << 5) | (op << 10) | sf); commit(); @@ -482,7 +667,7 @@ void AssemblyBuilderA64::placeI16(const char* name, RegisterA64 dst, int src, ui commit(); } -void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size) +void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 src, uint8_t op, uint8_t size, int sizelog) { if (logText) log(name, dst, src); @@ -490,8 +675,8 @@ void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 sr switch (src.kind) { case AddressKindA64::imm: - if (src.data >= 0 && src.data % (1 << size) == 0) - place(dst.index | (src.base.index << 5) | ((src.data >> size) << 10) | (op << 22) | (1 << 24) | (size << 30)); + if (src.data >= 0 && (src.data >> sizelog) < 1024 && (src.data & ((1 << sizelog) - 1)) == 0) + place(dst.index | (src.base.index << 5) | ((src.data >> sizelog) << 10) | (op << 22) | (1 << 24) | (size << 30)); else if (src.data >= -256 && src.data <= 255) place(dst.index | (src.base.index << 5) | ((src.data & ((1 << 9) - 1)) << 12) | (op << 22) | (size << 30)); else @@ -566,16 +751,45 @@ void AssemblyBuilderA64::placeADR(const char* name, RegisterA64 dst, uint8_t op, log(name, dst, label); } -void AssemblyBuilderA64::placeP(const char* name, RegisterA64 src1, RegisterA64 src2, AddressA64 dst, uint8_t op, uint8_t size) +void AssemblyBuilderA64::placeP(const char* name, RegisterA64 src1, RegisterA64 src2, AddressA64 dst, uint8_t op, uint8_t opc, int sizelog) { if (logText) log(name, src1, src2, dst); LUAU_ASSERT(dst.kind == AddressKindA64::imm); - LUAU_ASSERT(dst.data >= -128 * (1 << size) && dst.data <= 127 * (1 << size)); - LUAU_ASSERT(dst.data % (1 << size) == 0); + LUAU_ASSERT(dst.data >= -128 * (1 << sizelog) && dst.data <= 127 * (1 << sizelog)); + LUAU_ASSERT(dst.data % (1 << sizelog) == 0); - place(src1.index | (dst.base.index << 5) | (src2.index << 10) | (((dst.data >> size) & 127) << 15) | (op << 22) | (size << 31)); + place(src1.index | (dst.base.index << 5) | (src2.index << 10) | (((dst.data >> sizelog) & 127) << 15) | (op << 22) | (opc << 30)); + commit(); +} + +void AssemblyBuilderA64::placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc) +{ + if (logText) + log(name, dst, src1, src2, cond); + + LUAU_ASSERT(dst.kind == src1.kind && dst.kind == src2.kind); + + uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0; + + place(dst.index | (src1.index << 5) | (opc << 10) | (codeForCondition[int(cond)] << 12) | (src2.index << 16) | (op << 21) | sf); + commit(); +} + +void AssemblyBuilderA64::placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc) +{ + if (logText) + { + if (opc) + log(name, src1, 0); + else + log(name, src1, src2); + } + + LUAU_ASSERT(src1.kind == src2.kind); + + place((opc << 3) | (src1.index << 5) | (0b1000 << 10) | (src2.index << 16) | (op << 21)); commit(); } @@ -747,6 +961,19 @@ void AssemblyBuilderA64::log(const char* opcode, Label label) logAppend(" %-12s.L%d\n", opcode, label.id); } +void AssemblyBuilderA64::log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond) +{ + logAppend(" %-12s", opcode); + log(dst); + text.append(","); + log(src1); + text.append(","); + log(src2); + text.append(","); + text.append(textForCondition[int(cond)] + 2); // skip b. + text.append("\n"); +} + void AssemblyBuilderA64::log(Label label) { logAppend(".L%d:\n", label.id); @@ -770,6 +997,14 @@ void AssemblyBuilderA64::log(RegisterA64 reg) logAppend("x%d", reg.index); break; + case KindA64::d: + logAppend("d%d", reg.index); + break; + + case KindA64::q: + logAppend("q%d", reg.index); + break; + case KindA64::none: if (reg.index == 31) text.append("sp"); diff --git a/CodeGen/src/AssemblyBuilderX64.cpp b/CodeGen/src/AssemblyBuilderX64.cpp index bf7889b8..0285c2a1 100644 --- a/CodeGen/src/AssemblyBuilderX64.cpp +++ b/CodeGen/src/AssemblyBuilderX64.cpp @@ -71,9 +71,9 @@ static ABIX64 getCurrentX64ABI() #endif } -AssemblyBuilderX64::AssemblyBuilderX64(bool logText) +AssemblyBuilderX64::AssemblyBuilderX64(bool logText, ABIX64 abi) : logText(logText) - , abi(getCurrentX64ABI()) + , abi(abi) { data.resize(4096); dataPos = data.size(); // data is filled backwards @@ -83,6 +83,11 @@ AssemblyBuilderX64::AssemblyBuilderX64(bool logText) codeEnd = code.data() + code.size(); } +AssemblyBuilderX64::AssemblyBuilderX64(bool logText) + : AssemblyBuilderX64(logText, getCurrentX64ABI()) +{ +} + AssemblyBuilderX64::~AssemblyBuilderX64() { LUAU_ASSERT(finalized); diff --git a/CodeGen/src/CodeGen.cpp b/CodeGen/src/CodeGen.cpp index 5ef5ba64..b0cc8d9c 100644 --- a/CodeGen/src/CodeGen.cpp +++ b/CodeGen/src/CodeGen.cpp @@ -43,6 +43,12 @@ #endif #endif +#if defined(__aarch64__) +#ifdef __APPLE__ +#include +#endif +#endif + LUAU_FASTFLAGVARIABLE(DebugCodegenNoOpt, false) namespace Luau @@ -209,7 +215,7 @@ static void lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& } } -[[maybe_unused]] static void lowerIr( +[[maybe_unused]] static bool lowerIr( X64::AssemblyBuilderX64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options) { constexpr uint32_t kFunctionAlignment = 32; @@ -221,31 +227,21 @@ static void lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& X64::IrLoweringX64 lowering(build, helpers, data, ir.function); lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); + + return true; } -[[maybe_unused]] static void lowerIr( +[[maybe_unused]] static bool lowerIr( A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options) { - if (A64::IrLoweringA64::canLower(ir.function)) - { - A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function); + if (!A64::IrLoweringA64::canLower(ir.function)) + return false; - lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); - } - else - { - // TODO: This is only needed while we don't support all IR opcodes - // When we can't translate some parts of the function, we instead encode a dummy assembly sequence that hands off control to VM - // In the future we could return nullptr from assembleFunction and handle it because there may be other reasons for why we refuse to assemble. - Label start = build.setLabel(); + A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function); - build.mov(A64::x0, 1); // finish function in VM - build.ldr(A64::x1, A64::mem(A64::rNativeContext, offsetof(NativeContext, gateExit))); - build.br(A64::x1); + lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); - for (int i = 0; i < proto->sizecode; i++) - ir.function.bcMapping[i].asmLocation = build.getLabelOffset(start); - } + return true; } template @@ -289,7 +285,13 @@ static NativeProto* assembleFunction(AssemblyBuilder& build, NativeState& data, constPropInBlockChains(ir); } - lowerIr(build, ir, data, helpers, proto, options); + if (!lowerIr(build, ir, data, helpers, proto, options)) + { + if (build.logText) + build.logAppend("; skipping (can't lower)\n\n"); + + return nullptr; + } if (build.logText) build.logAppend("\n"); @@ -345,6 +347,22 @@ static void onSetBreakpoint(lua_State* L, Proto* proto, int instruction) LUAU_ASSERT(!"native breakpoints are not implemented"); } +#if defined(__aarch64__) +static unsigned int getCpuFeaturesA64() +{ + unsigned int result = 0; + +#ifdef __APPLE__ + int jscvt = 0; + size_t jscvtLen = sizeof(jscvt); + if (sysctlbyname("hw.optional.arm.FEAT_JSCVT", &jscvt, &jscvtLen, nullptr, 0) == 0 && jscvt == 1) + result |= A64::Feature_JSCVT; +#endif + + return result; +} +#endif + bool isSupported() { #if !LUA_CUSTOM_EXECUTION @@ -374,8 +392,20 @@ bool isSupported() return true; #elif defined(__aarch64__) + if (LUA_EXTRA_SIZE != 1) + return false; + + if (sizeof(TValue) != 16) + return false; + + if (sizeof(LuaNode) != 32) + return false; + // TODO: A64 codegen does not generate correct unwind info at the moment so it requires longjmp instead of C++ exceptions - return bool(LUA_USE_LONGJMP); + if (!LUA_USE_LONGJMP) + return false; + + return true; #else return false; #endif @@ -447,7 +477,7 @@ void compile(lua_State* L, int idx) return; #if defined(__aarch64__) - A64::AssemblyBuilderA64 build(/* logText= */ false); + A64::AssemblyBuilderA64 build(/* logText= */ false, getCpuFeaturesA64()); #else X64::AssemblyBuilderX64 build(/* logText= */ false); #endif @@ -470,10 +500,15 @@ void compile(lua_State* L, int idx) // Skip protos that have been compiled during previous invocations of CodeGen::compile for (Proto* p : protos) if (p && getProtoExecData(p) == nullptr) - results.push_back(assembleFunction(build, *data, helpers, p, {})); + if (NativeProto* np = assembleFunction(build, *data, helpers, p, {})) + results.push_back(np); build.finalize(); + // If no functions were assembled, we don't need to allocate/copy executable pages for helpers + if (results.empty()) + return; + uint8_t* nativeData = nullptr; size_t sizeNativeData = 0; uint8_t* codeStart = nullptr; @@ -507,7 +542,7 @@ std::string getAssembly(lua_State* L, int idx, AssemblyOptions options) const TValue* func = luaA_toobject(L, idx); #if defined(__aarch64__) - A64::AssemblyBuilderA64 build(/* logText= */ options.includeAssembly); + A64::AssemblyBuilderA64 build(/* logText= */ options.includeAssembly, getCpuFeaturesA64()); #else X64::AssemblyBuilderX64 build(/* logText= */ options.includeAssembly); #endif @@ -527,10 +562,8 @@ std::string getAssembly(lua_State* L, int idx, AssemblyOptions options) for (Proto* p : protos) if (p) - { - NativeProto* nativeProto = assembleFunction(build, data, helpers, p, options); - destroyNativeProto(nativeProto); - } + if (NativeProto* np = assembleFunction(build, data, helpers, p, options)) + destroyNativeProto(np); build.finalize(); diff --git a/CodeGen/src/CodeGenA64.cpp b/CodeGen/src/CodeGenA64.cpp index 028b3327..e7a1e2e2 100644 --- a/CodeGen/src/CodeGenA64.cpp +++ b/CodeGen/src/CodeGenA64.cpp @@ -100,6 +100,16 @@ void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers) build.logAppend("; exitNoContinueVm\n"); helpers.exitNoContinueVm = build.setLabel(); emitExit(build, /* continueInVm */ false); + + if (build.logText) + build.logAppend("; reentry\n"); + helpers.reentry = build.setLabel(); + emitReentry(build, helpers); + + if (build.logText) + build.logAppend("; interrupt\n"); + helpers.interrupt = build.setLabel(); + emitInterrupt(build); } } // namespace A64 diff --git a/CodeGen/src/CodeGenUtils.cpp b/CodeGen/src/CodeGenUtils.cpp index 26568c30..ae3dbd45 100644 --- a/CodeGen/src/CodeGenUtils.cpp +++ b/CodeGen/src/CodeGenUtils.cpp @@ -126,7 +126,89 @@ void callEpilogC(lua_State* L, int nresults, int n) L->top = (nresults == LUA_MULTRET) ? res : cip->top; } -const Instruction* returnFallback(lua_State* L, StkId ra, int n) +// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts/savedpc +Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults) +{ + // slow-path: not a function call + if (LUAU_UNLIKELY(!ttisfunction(ra))) + { + luaV_tryfuncTM(L, ra); + argtop++; // __call adds an extra self + } + + Closure* ccl = clvalue(ra); + + CallInfo* ci = incr_ci(L); + ci->func = ra; + ci->base = ra + 1; + ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet + ci->savedpc = NULL; + ci->flags = 0; + ci->nresults = nresults; + + L->base = ci->base; + L->top = argtop; + + // note: this reallocs stack, but we don't need to VM_PROTECT this + // this is because we're going to modify base/savedpc manually anyhow + // crucially, we can't use ra/argtop after this line + luaD_checkstack(L, ccl->stacksize); + + LUAU_ASSERT(ci->top <= L->stack_last); + + if (!ccl->isC) + { + Proto* p = ccl->l.p; + + // fill unused parameters with nil + StkId argi = L->top; + StkId argend = L->base + p->numparams; + while (argi < argend) + setnilvalue(argi++); // complete missing arguments + L->top = p->is_vararg ? argi : ci->top; + + // keep executing new function + ci->savedpc = p->code; + return ccl; + } + else + { + lua_CFunction func = ccl->c.f; + int n = func(L); + + // yield + if (n < 0) + return NULL; + + // ci is our callinfo, cip is our parent + CallInfo* ci = L->ci; + CallInfo* cip = ci - 1; + + // copy return values into parent stack (but only up to nresults!), fill the rest with nil + // note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally + StkId res = ci->func; + StkId vali = L->top - n; + StkId valend = L->top; + + int i; + for (i = nresults; i != 0 && vali < valend; i--) + setobj2s(L, res++, vali++); + while (i-- > 0) + setnilvalue(res++); + + // pop the stack frame + L->ci = cip; + L->base = cip->base; + L->top = (nresults == LUA_MULTRET) ? res : cip->top; + + // keep executing current function + LUAU_ASSERT(isLua(cip)); + return clvalue(cip->func); + } +} + +// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts +Closure* returnFallback(lua_State* L, StkId ra, int n) { // ci is our callinfo, cip is our parent CallInfo* ci = L->ci; @@ -159,8 +241,9 @@ const Instruction* returnFallback(lua_State* L, StkId ra, int n) return NULL; } + // keep executing new function LUAU_ASSERT(isLua(cip)); - return cip->savedpc; + return clvalue(cip->func); } } // namespace CodeGen diff --git a/CodeGen/src/CodeGenUtils.h b/CodeGen/src/CodeGenUtils.h index 5d37bfd1..6066a691 100644 --- a/CodeGen/src/CodeGenUtils.h +++ b/CodeGen/src/CodeGenUtils.h @@ -16,7 +16,8 @@ void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc); Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults); void callEpilogC(lua_State* L, int nresults, int n); -const Instruction* returnFallback(lua_State* L, StkId ra, int n); +Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults); +Closure* returnFallback(lua_State* L, StkId ra, int n); } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/EmitBuiltinsX64.cpp b/CodeGen/src/EmitBuiltinsX64.cpp index d70b6ed8..2e745cbf 100644 --- a/CodeGen/src/EmitBuiltinsX64.cpp +++ b/CodeGen/src/EmitBuiltinsX64.cpp @@ -3,9 +3,10 @@ #include "Luau/AssemblyBuilderX64.h" #include "Luau/Bytecode.h" +#include "Luau/IrCallWrapperX64.h" +#include "Luau/IrRegAllocX64.h" #include "EmitCommonX64.h" -#include "IrRegAllocX64.h" #include "NativeState.h" #include "lstate.h" @@ -19,40 +20,11 @@ namespace CodeGen namespace X64 { -void emitBuiltinMathFloor(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vroundsd(tmp.reg, tmp.reg, luauRegValue(arg), RoundingModeX64::RoundToNegativeInfinity); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - -void emitBuiltinMathCeil(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vroundsd(tmp.reg, tmp.reg, luauRegValue(arg), RoundingModeX64::RoundToPositiveInfinity); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - -void emitBuiltinMathSqrt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vsqrtsd(tmp.reg, tmp.reg, luauRegValue(arg)); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - -void emitBuiltinMathAbs(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - build.vmovsd(tmp.reg, luauRegValue(arg)); - build.vandpd(tmp.reg, tmp.reg, build.i64(~(1LL << 63))); - build.vmovsd(luauRegValue(ra), tmp.reg); -} - static void emitBuiltinMathSingleArgFunc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int32_t offset) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.call(qword[rNativeContext + offset]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.call(qword[rNativeContext + offset]); build.vmovsd(luauRegValue(ra), xmm0); } @@ -64,20 +36,10 @@ void emitBuiltinMathExp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npar void emitBuiltinMathFmod(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.vmovsd(xmm1, qword[args + offsetof(TValue, value)]); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_fmod)]); - - build.vmovsd(luauRegValue(ra), xmm0); -} - -void emitBuiltinMathPow(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.vmovsd(xmm1, qword[args + offsetof(TValue, value)]); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::xmmword, qword[args + offsetof(TValue, value)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_fmod)]); build.vmovsd(luauRegValue(ra), xmm0); } @@ -129,10 +91,10 @@ void emitBuiltinMathTanh(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npa void emitBuiltinMathAtan2(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - build.vmovsd(xmm1, qword[args + offsetof(TValue, value)]); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_atan2)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::xmmword, qword[args + offsetof(TValue, value)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_atan2)]); build.vmovsd(luauRegValue(ra), xmm0); } @@ -194,46 +156,23 @@ void emitBuiltinMathLog(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npar void emitBuiltinMathLdexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); + ScopedRegX64 tmp{regs, SizeX64::qword}; + build.vcvttsd2si(tmp.reg, qword[args + offsetof(TValue, value)]); - if (build.abi == ABIX64::Windows) - build.vcvttsd2si(rArg2, qword[args + offsetof(TValue, value)]); - else - build.vcvttsd2si(rArg1, qword[args + offsetof(TValue, value)]); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_ldexp)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_ldexp)]); build.vmovsd(luauRegValue(ra), xmm0); } -void emitBuiltinMathRound(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) -{ - ScopedRegX64 tmp0{regs, SizeX64::xmmword}; - ScopedRegX64 tmp1{regs, SizeX64::xmmword}; - ScopedRegX64 tmp2{regs, SizeX64::xmmword}; - - build.vmovsd(tmp0.reg, luauRegValue(arg)); - build.vandpd(tmp1.reg, tmp0.reg, build.f64x2(-0.0, -0.0)); - build.vmovsd(tmp2.reg, build.i64(0x3fdfffffffffffff)); // 0.49999999999999994 - build.vorpd(tmp1.reg, tmp1.reg, tmp2.reg); - build.vaddsd(tmp0.reg, tmp0.reg, tmp1.reg); - build.vroundsd(tmp0.reg, tmp0.reg, tmp0.reg, RoundingModeX64::RoundToZero); - - build.vmovsd(luauRegValue(ra), tmp0.reg); -} - void emitBuiltinMathFrexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - - if (build.abi == ABIX64::Windows) - build.lea(rArg2, sTemporarySlot); - else - build.lea(rArg1, sTemporarySlot); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_frexp)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::qword, sTemporarySlot); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_frexp)]); build.vmovsd(luauRegValue(ra), xmm0); @@ -243,15 +182,10 @@ void emitBuiltinMathFrexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int np void emitBuiltinMathModf(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - build.vmovsd(xmm0, luauRegValue(arg)); - - if (build.abi == ABIX64::Windows) - build.lea(rArg2, sTemporarySlot); - else - build.lea(rArg1, sTemporarySlot); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_modf)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg)); + callWrap.addArgument(SizeX64::qword, sTemporarySlot); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_modf)]); build.vmovsd(xmm1, qword[sTemporarySlot + 0]); build.vmovsd(luauRegValue(ra), xmm1); @@ -301,12 +235,10 @@ void emitBuiltinType(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams void emitBuiltinTypeof(IrRegAllocX64& regs, AssemblyBuilderX64& build, int nparams, int ra, int arg, OperandX64 args, int nresults) { - regs.assertAllFree(); - - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(arg)); - - build.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(arg)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]); build.mov(luauRegValue(ra), rax); } @@ -328,22 +260,18 @@ void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int r case LBF_MATH_MIN: case LBF_MATH_MAX: case LBF_MATH_CLAMP: + case LBF_MATH_FLOOR: + case LBF_MATH_CEIL: + case LBF_MATH_SQRT: + case LBF_MATH_POW: + case LBF_MATH_ABS: + case LBF_MATH_ROUND: // These instructions are fully translated to IR break; - case LBF_MATH_FLOOR: - return emitBuiltinMathFloor(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_CEIL: - return emitBuiltinMathCeil(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_SQRT: - return emitBuiltinMathSqrt(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_ABS: - return emitBuiltinMathAbs(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_EXP: return emitBuiltinMathExp(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_FMOD: return emitBuiltinMathFmod(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_POW: - return emitBuiltinMathPow(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_ASIN: return emitBuiltinMathAsin(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_SIN: @@ -370,8 +298,6 @@ void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int r return emitBuiltinMathLog(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_LDEXP: return emitBuiltinMathLdexp(regs, build, nparams, ra, arg, argsOp, nresults); - case LBF_MATH_ROUND: - return emitBuiltinMathRound(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_FREXP: return emitBuiltinMathFrexp(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_MODF: diff --git a/CodeGen/src/EmitCommon.h b/CodeGen/src/EmitCommon.h index 3c41c271..a71eafd4 100644 --- a/CodeGen/src/EmitCommon.h +++ b/CodeGen/src/EmitCommon.h @@ -20,9 +20,16 @@ constexpr unsigned kOffsetOfInstructionC = 3; // Leaf functions that are placed in every module to perform common instruction sequences struct ModuleHelpers { + // A64/X64 Label exitContinueVm; Label exitNoContinueVm; + + // X64 Label continueCallInVm; + + // A64 + Label reentry; // x0: closure + Label interrupt; // x0: pc offset, x1: return address, x2: interrupt }; } // namespace CodeGen diff --git a/CodeGen/src/EmitCommonA64.cpp b/CodeGen/src/EmitCommonA64.cpp index 66810d37..2b4bbaba 100644 --- a/CodeGen/src/EmitCommonA64.cpp +++ b/CodeGen/src/EmitCommonA64.cpp @@ -11,6 +11,11 @@ namespace CodeGen namespace A64 { +void emitUpdateBase(AssemblyBuilderA64& build) +{ + build.ldr(rBase, mem(rState, offsetof(lua_State, base))); +} + void emitExit(AssemblyBuilderA64& build, bool continueInVm) { build.mov(x0, continueInVm); @@ -18,56 +23,82 @@ void emitExit(AssemblyBuilderA64& build, bool continueInVm) build.br(x1); } -void emitUpdateBase(AssemblyBuilderA64& build) +void emitInterrupt(AssemblyBuilderA64& build) { - build.ldr(rBase, mem(rState, offsetof(lua_State, base))); -} + // x0 = pc offset + // x1 = return address in native code + // x2 = interrupt -void emitSetSavedPc(AssemblyBuilderA64& build, int pcpos) -{ - if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate) - { - build.add(x0, rCode, uint16_t(pcpos * sizeof(Instruction))); - } - else - { - build.mov(x0, pcpos * sizeof(Instruction)); - build.add(x0, rCode, x0); - } + // Stash return address in rBase; we need to reload rBase anyway + build.mov(rBase, x1); + // Update savedpc; required in case interrupt errors + build.add(x0, rCode, x0); build.ldr(x1, mem(rState, offsetof(lua_State, ci))); build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); -} - -void emitInterrupt(AssemblyBuilderA64& build, int pcpos) -{ - Label skip; - - build.ldr(x2, mem(rState, offsetof(lua_State, global))); - build.ldr(x2, mem(x2, offsetof(global_State, cb.interrupt))); - build.cbz(x2, skip); - - emitSetSavedPc(build, pcpos + 1); // uses x0/x1 // Call interrupt - // TODO: This code should be outlined so that it can be shared by multiple interruptible instructions build.mov(x0, rState); build.mov(w1, -1); build.blr(x2); // Check if we need to exit + Label skip; build.ldrb(w0, mem(rState, offsetof(lua_State, status))); build.cbz(w0, skip); // L->ci->savedpc-- - build.ldr(x0, mem(rState, offsetof(lua_State, ci))); - build.ldr(x1, mem(x0, offsetof(CallInfo, savedpc))); - build.sub(x1, x1, sizeof(Instruction)); - build.str(x1, mem(x0, offsetof(CallInfo, savedpc))); + // note: recomputing this avoids having to stash x0 + build.ldr(x1, mem(rState, offsetof(lua_State, ci))); + build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc))); + build.sub(x0, x0, sizeof(Instruction)); + build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); emitExit(build, /* continueInVm */ false); build.setLabel(skip); + + // Return back to caller; rBase has stashed return address + build.mov(x0, rBase); + + emitUpdateBase(build); // interrupt may have reallocated stack + + build.br(x0); +} + +void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers) +{ + // x0 = closure object to reentry (equal to clvalue(L->ci->func)) + + // If the fallback requested an exit, we need to do this right away + build.cbz(x0, helpers.exitNoContinueVm); + + emitUpdateBase(build); + + // Need to update state of the current function before we jump away + build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto + + build.mov(rClosure, x0); + build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k + build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code + + // Get instruction index from instruction pointer + // To get instruction index from instruction pointer, we need to divide byte offset by 4 + // But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out + build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci + build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc + build.sub(x2, x2, rCode); + build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads + + // We need to check if the new function can be executed natively + // TODO: This can be done earlier in the function flow, to reduce the JIT->VM transition penalty + build.ldr(x1, mem(x1, offsetofProtoExecData)); + build.cbz(x1, helpers.exitContinueVm); + + // Get new instruction location and jump to it + build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets))); + build.ldr(x1, mem(x1, x2)); + build.br(x1); } } // namespace A64 diff --git a/CodeGen/src/EmitCommonA64.h b/CodeGen/src/EmitCommonA64.h index 251f6a35..5ca9c558 100644 --- a/CodeGen/src/EmitCommonA64.h +++ b/CodeGen/src/EmitCommonA64.h @@ -11,7 +11,7 @@ // AArch64 ABI reminder: // Arguments: x0-x7, v0-v7 // Return: x0, v0 (or x8 that points to the address of the resulting structure) -// Volatile: x9-x14, v16-v31 ("caller-saved", any call may change them) +// Volatile: x9-x15, v16-v31 ("caller-saved", any call may change them) // Non-volatile: x19-x28, v8-v15 ("callee-saved", preserved after calls, only bottom half of SIMD registers is preserved!) // Reserved: x16-x18: reserved for linker/platform use; x29: frame pointer (unless omitted); x30: link register; x31: stack pointer @@ -25,52 +25,27 @@ struct NativeState; namespace A64 { -// Data that is very common to access is placed in non-volatile registers +// Data that is very common to access is placed in non-volatile registers: +// 1. Constant registers (only loaded during codegen entry) constexpr RegisterA64 rState = x19; // lua_State* L -constexpr RegisterA64 rBase = x20; // StkId base -constexpr RegisterA64 rNativeContext = x21; // NativeContext* context -constexpr RegisterA64 rConstants = x22; // TValue* k -constexpr RegisterA64 rClosure = x23; // Closure* cl -constexpr RegisterA64 rCode = x24; // Instruction* code +constexpr RegisterA64 rNativeContext = x20; // NativeContext* context + +// 2. Frame registers (reloaded when call frame changes; rBase is also reloaded after all calls that may reallocate stack) +constexpr RegisterA64 rConstants = x21; // TValue* k +constexpr RegisterA64 rClosure = x22; // Closure* cl +constexpr RegisterA64 rCode = x23; // Instruction* code +constexpr RegisterA64 rBase = x24; // StkId base // Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point // See CodeGenA64.cpp for layout constexpr unsigned kStackSize = 64; // 8 stashed registers -inline AddressA64 luauReg(int ri) -{ - return mem(rBase, ri * sizeof(TValue)); -} - -inline AddressA64 luauRegValue(int ri) -{ - return mem(rBase, ri * sizeof(TValue) + offsetof(TValue, value)); -} - -inline AddressA64 luauRegTag(int ri) -{ - return mem(rBase, ri * sizeof(TValue) + offsetof(TValue, tt)); -} - -inline AddressA64 luauConstant(int ki) -{ - return mem(rConstants, ki * sizeof(TValue)); -} - -inline AddressA64 luauConstantTag(int ki) -{ - return mem(rConstants, ki * sizeof(TValue) + offsetof(TValue, tt)); -} - -inline AddressA64 luauConstantValue(int ki) -{ - return mem(rConstants, ki * sizeof(TValue) + offsetof(TValue, value)); -} - -void emitExit(AssemblyBuilderA64& build, bool continueInVm); void emitUpdateBase(AssemblyBuilderA64& build); -void emitSetSavedPc(AssemblyBuilderA64& build, int pcpos); // invalidates x0/x1 -void emitInterrupt(AssemblyBuilderA64& build, int pcpos); + +// TODO: Move these to CodeGenA64 so that they can't be accidentally called during lowering +void emitExit(AssemblyBuilderA64& build, bool continueInVm); +void emitInterrupt(AssemblyBuilderA64& build); +void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers); } // namespace A64 } // namespace CodeGen diff --git a/CodeGen/src/EmitCommonX64.cpp b/CodeGen/src/EmitCommonX64.cpp index e9cfdc48..7db4068d 100644 --- a/CodeGen/src/EmitCommonX64.cpp +++ b/CodeGen/src/EmitCommonX64.cpp @@ -2,7 +2,9 @@ #include "EmitCommonX64.h" #include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrCallWrapperX64.h" #include "Luau/IrData.h" +#include "Luau/IrRegAllocX64.h" #include "CustomExecUtils.h" #include "NativeState.h" @@ -64,18 +66,19 @@ void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, } } -void jumpOnAnyCmpFallback(AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label) +void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(ra)); - build.lea(rArg3, luauRegAddress(rb)); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual) - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]); else if (cond == IrCondition::NotLess || cond == IrCondition::Less) - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]); else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal) - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]); else LUAU_ASSERT(!"Unsupported condition"); @@ -119,68 +122,66 @@ void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, Regi build.jcc(ConditionX64::NotZero, label); } -void callArithHelper(AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm) +void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm) { - if (build.abi == ABIX64::Windows) - build.mov(sArg5, tm); - else - build.mov(rArg5, tm); - - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(ra)); - build.lea(rArg3, luauRegAddress(rb)); - build.lea(rArg4, c); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.addArgument(SizeX64::qword, c); + callWrap.addArgument(SizeX64::dword, tm); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]); emitUpdateBase(build); } -void callLengthHelper(AssemblyBuilderX64& build, int ra, int rb) +void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(ra)); - build.lea(rArg3, luauRegAddress(rb)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]); emitUpdateBase(build); } -void callPrepareForN(AssemblyBuilderX64& build, int limit, int step, int init) +void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(limit)); - build.lea(rArg3, luauRegAddress(step)); - build.lea(rArg4, luauRegAddress(init)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_prepareFORN)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(limit)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(step)); + callWrap.addArgument(SizeX64::qword, luauRegAddress(init)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_prepareFORN)]); } -void callGetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) +void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(rb)); - build.lea(rArg3, c); - build.lea(rArg4, luauRegAddress(ra)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.addArgument(SizeX64::qword, c); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]); emitUpdateBase(build); } -void callSetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) +void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra) { - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(rb)); - build.lea(rArg3, c); - build.lea(rArg4, luauRegAddress(ra)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(rb)); + callWrap.addArgument(SizeX64::qword, c); + callWrap.addArgument(SizeX64::qword, luauRegAddress(ra)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]); emitUpdateBase(build); } -// works for luaC_barriertable, luaC_barrierf -static void callBarrierImpl(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip, int contextOffset) +void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip) { - LUAU_ASSERT(tmp != object); - // iscollectable(ra) build.cmp(luauRegTag(ra), LUA_TSTRING); build.jcc(ConditionX64::Less, skip); @@ -193,88 +194,52 @@ static void callBarrierImpl(AssemblyBuilderX64& build, RegisterX64 tmp, Register build.mov(tmp, luauRegValue(ra)); build.test(byte[tmp + offsetof(GCheader, marked)], bit2mask(WHITE0BIT, WHITE1BIT)); build.jcc(ConditionX64::Zero, skip); - - // TODO: even with re-ordering we have a chance of failure, we have a task to fix this in the future - if (object == rArg3) - { - LUAU_ASSERT(tmp != rArg2); - - if (rArg2 != object) - build.mov(rArg2, object); - - if (rArg3 != tmp) - build.mov(rArg3, tmp); - } - else - { - if (rArg3 != tmp) - build.mov(rArg3, tmp); - - if (rArg2 != object) - build.mov(rArg2, object); - } - - build.mov(rArg1, rState); - build.call(qword[rNativeContext + contextOffset]); } -void callBarrierTable(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 table, int ra, Label& skip) +void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra, Label& skip) { - callBarrierImpl(build, tmp, table, ra, skip, offsetof(NativeContext, luaC_barriertable)); + ScopedRegX64 tmp{regs, SizeX64::qword}; + checkObjectBarrierConditions(build, tmp.reg, object, ra, skip); + + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, object, objectOp); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierf)]); } -void callBarrierObject(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip) -{ - callBarrierImpl(build, tmp, object, ra, skip, offsetof(NativeContext, luaC_barrierf)); -} - -void callBarrierTableFast(AssemblyBuilderX64& build, RegisterX64 table, Label& skip) +void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp, Label& skip) { // isblack(obj2gco(t)) build.test(byte[table + offsetof(GCheader, marked)], bitmask(BLACKBIT)); build.jcc(ConditionX64::Zero, skip); - // Argument setup re-ordered to avoid conflicts with table register - if (table != rArg2) - build.mov(rArg2, table); - build.lea(rArg3, addr[rArg2 + offsetof(Table, gclist)]); - build.mov(rArg1, rState); - build.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, table, tableOp); + callWrap.addArgument(SizeX64::qword, addr[table + offsetof(Table, gclist)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]); } -void callCheckGc(AssemblyBuilderX64& build, int pcpos, bool savepc, Label& skip) +void callCheckGc(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& skip) { - build.mov(rax, qword[rState + offsetof(lua_State, global)]); - build.mov(rdx, qword[rax + offsetof(global_State, totalbytes)]); - build.cmp(rdx, qword[rax + offsetof(global_State, GCthreshold)]); - build.jcc(ConditionX64::Below, skip); + { + ScopedRegX64 tmp1{regs, SizeX64::qword}; + ScopedRegX64 tmp2{regs, SizeX64::qword}; - if (savepc) - emitSetSavedPc(build, pcpos + 1); - - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), 1); - build.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]); + build.mov(tmp1.reg, qword[rState + offsetof(lua_State, global)]); + build.mov(tmp2.reg, qword[tmp1.reg + offsetof(global_State, totalbytes)]); + build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(global_State, GCthreshold)]); + build.jcc(ConditionX64::Below, skip); + } + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, 1); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]); emitUpdateBase(build); } -void callGetFastTmOrFallback(AssemblyBuilderX64& build, RegisterX64 table, TMS tm, Label& fallback) -{ - build.mov(rArg1, qword[table + offsetof(Table, metatable)]); - build.test(rArg1, rArg1); - build.jcc(ConditionX64::Zero, fallback); // no metatable - - build.test(byte[rArg1 + offsetof(Table, tmcache)], 1 << tm); - build.jcc(ConditionX64::NotZero, fallback); // no tag method - - // rArg1 is already prepared - build.mov(rArg2, tm); - build.mov(rax, qword[rState + offsetof(lua_State, global)]); - build.mov(rArg3, qword[rax + offsetof(global_State, tmname) + tm * sizeof(TString*)]); - build.call(qword[rNativeContext + offsetof(NativeContext, luaT_gettm)]); -} - void emitExit(AssemblyBuilderX64& build, bool continueInVm) { if (continueInVm) @@ -317,6 +282,8 @@ void emitInterrupt(AssemblyBuilderX64& build, int pcpos) build.mov(dwordReg(rArg2), -1); // function accepts 'int' here and using qword reg would've forced 8 byte constant here build.call(r8); + emitUpdateBase(build); // interrupt may have reallocated stack + // Check if we need to exit build.mov(al, byte[rState + offsetof(lua_State, status)]); build.test(al, al); diff --git a/CodeGen/src/EmitCommonX64.h b/CodeGen/src/EmitCommonX64.h index 6b676255..85045ad5 100644 --- a/CodeGen/src/EmitCommonX64.h +++ b/CodeGen/src/EmitCommonX64.h @@ -27,10 +27,13 @@ namespace CodeGen enum class IrCondition : uint8_t; struct NativeState; +struct IrOp; namespace X64 { +struct IrRegAllocX64; + // Data that is very common to access is placed in non-volatile registers constexpr RegisterX64 rState = r15; // lua_State* L constexpr RegisterX64 rBase = r14; // StkId base @@ -233,21 +236,20 @@ inline void jumpIfNodeKeyNotInExpectedSlot(AssemblyBuilderX64& build, RegisterX6 } void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label); -void jumpOnAnyCmpFallback(AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label); +void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label); void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos); void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label); -void callArithHelper(AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm); -void callLengthHelper(AssemblyBuilderX64& build, int ra, int rb); -void callPrepareForN(AssemblyBuilderX64& build, int limit, int step, int init); -void callGetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); -void callSetTable(AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); -void callBarrierTable(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 table, int ra, Label& skip); -void callBarrierObject(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip); -void callBarrierTableFast(AssemblyBuilderX64& build, RegisterX64 table, Label& skip); -void callCheckGc(AssemblyBuilderX64& build, int pcpos, bool savepc, Label& skip); -void callGetFastTmOrFallback(AssemblyBuilderX64& build, RegisterX64 table, TMS tm, Label& fallback); +void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm); +void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb); +void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init); +void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); +void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra); +void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip); +void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra, Label& skip); +void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp, Label& skip); +void callCheckGc(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& skip); void emitExit(AssemblyBuilderX64& build, bool continueInVm); void emitUpdateBase(AssemblyBuilderX64& build); diff --git a/CodeGen/src/EmitInstructionA64.cpp b/CodeGen/src/EmitInstructionA64.cpp index 8289ee2e..400ba77e 100644 --- a/CodeGen/src/EmitInstructionA64.cpp +++ b/CodeGen/src/EmitInstructionA64.cpp @@ -23,35 +23,50 @@ void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, i build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback))); build.blr(x3); + // reentry with x0=closure (NULL will trigger exit) + build.b(helpers.reentry); +} + +void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults) +{ + // argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams; + if (nparams == LUA_MULTRET) + build.ldr(x2, mem(rState, offsetof(lua_State, top))); + else + build.add(x2, rBase, uint16_t((ra + 1 + nparams) * sizeof(TValue))); + + // callFallback(L, ra, argtop, nresults) + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(ra * sizeof(TValue))); + build.mov(w3, nresults); + build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback))); + build.blr(x4); + + // reentry with x0=closure (NULL will trigger exit) + build.b(helpers.reentry); +} + +void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux) +{ + // luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false) + build.mov(x0, rState); + build.ldr(x1, mem(rClosure, offsetof(Closure, env))); + build.mov(x2, rConstants); + build.mov(w3, aux); + build.mov(w4, 0); + build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_getimport))); + build.blr(x5); + emitUpdateBase(build); - // If the fallback requested an exit, we need to do this right away - build.cbz(x0, helpers.exitNoContinueVm); + // setobj2s(L, ra, L->top - 1) + build.ldr(x0, mem(rState, offsetof(lua_State, top))); + build.sub(x0, x0, sizeof(TValue)); + build.ldr(q0, x0); + build.str(q0, mem(rBase, ra * sizeof(TValue))); - // Need to update state of the current function before we jump away - build.ldr(x1, mem(rState, offsetof(lua_State, ci))); // L->ci - build.ldr(x1, mem(x1, offsetof(CallInfo, func))); // L->ci->func - build.ldr(rClosure, mem(x1, offsetof(TValue, value.gc))); // L->ci->func->value.gc aka cl - - build.ldr(x1, mem(rClosure, offsetof(Closure, l.p))); // cl->l.p aka proto - - build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k - build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code - - // Get instruction index from instruction pointer - // To get instruction index from instruction pointer, we need to divide byte offset by 4 - // But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out - build.sub(x2, x0, rCode); - build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads - - // We need to check if the new function can be executed natively - build.ldr(x1, mem(x1, offsetofProtoExecData)); - build.cbz(x1, helpers.exitContinueVm); - - // Get new instruction location and jump to it - build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets))); - build.ldr(x1, mem(x1, x2)); - build.br(x1); + // L->top-- + build.str(x0, mem(rState, offsetof(lua_State, top))); } } // namespace A64 diff --git a/CodeGen/src/EmitInstructionA64.h b/CodeGen/src/EmitInstructionA64.h index 7f15d819..278d8e8e 100644 --- a/CodeGen/src/EmitInstructionA64.h +++ b/CodeGen/src/EmitInstructionA64.h @@ -1,6 +1,8 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #pragma once +#include + namespace Luau { namespace CodeGen @@ -14,6 +16,8 @@ namespace A64 class AssemblyBuilderA64; void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int n); +void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults); +void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux); } // namespace A64 } // namespace CodeGen diff --git a/CodeGen/src/EmitInstructionX64.cpp b/CodeGen/src/EmitInstructionX64.cpp index 649498f5..b645f9f7 100644 --- a/CodeGen/src/EmitInstructionX64.cpp +++ b/CodeGen/src/EmitInstructionX64.cpp @@ -2,6 +2,7 @@ #include "EmitInstructionX64.h" #include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrRegAllocX64.h" #include "CustomExecUtils.h" #include "EmitCommonX64.h" @@ -315,7 +316,7 @@ void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, i build.jmp(qword[rdx + rax * 2]); } -void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index) +void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index) { OperandX64 last = index + count - 1; @@ -346,7 +347,7 @@ void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int Label skipResize; - RegisterX64 table = rax; + RegisterX64 table = regs.takeReg(rax); build.mov(table, luauRegValue(ra)); @@ -411,7 +412,7 @@ void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int build.setLabel(endLoop); } - callBarrierTableFast(build, table, next); + callBarrierTableFast(regs, build, table, {}, next); } void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit) @@ -483,10 +484,8 @@ void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRep build.jcc(ConditionX64::NotZero, loopRepeat); } -void emitinstForGLoopFallback(AssemblyBuilderX64& build, int pcpos, int ra, int aux, Label& loopRepeat) +void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat) { - emitSetSavedPc(build, pcpos + 1); - build.mov(rArg1, rState); build.mov(dwordReg(rArg2), ra); build.mov(dwordReg(rArg3), aux); diff --git a/CodeGen/src/EmitInstructionX64.h b/CodeGen/src/EmitInstructionX64.h index 880c9fa4..cc1b8645 100644 --- a/CodeGen/src/EmitInstructionX64.h +++ b/CodeGen/src/EmitInstructionX64.h @@ -15,12 +15,13 @@ namespace X64 { class AssemblyBuilderX64; +struct IrRegAllocX64; void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults); void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults); -void emitInstSetList(AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index); +void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, Label& next, int ra, int rb, int count, uint32_t index); void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit); -void emitinstForGLoopFallback(AssemblyBuilderX64& build, int pcpos, int ra, int aux, Label& loopRepeat); +void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat); void emitInstForGPrepXnextFallback(AssemblyBuilderX64& build, int pcpos, int ra, Label& target); void emitInstAnd(AssemblyBuilderX64& build, int ra, int rb, int rc); void emitInstAndK(AssemblyBuilderX64& build, int ra, int rb, int kc); diff --git a/CodeGen/src/IrAnalysis.cpp b/CodeGen/src/IrAnalysis.cpp index 6e77dfe4..b248b97d 100644 --- a/CodeGen/src/IrAnalysis.cpp +++ b/CodeGen/src/IrAnalysis.cpp @@ -300,17 +300,17 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& if (function.boolOp(inst.b)) capturedRegs.set(inst.a.index, true); break; - case IrCmd::LOP_SETLIST: + case IrCmd::SETLIST: use(inst.b); useRange(inst.c.index, function.intOp(inst.d)); break; - case IrCmd::LOP_CALL: + case IrCmd::CALL: use(inst.a); useRange(inst.a.index + 1, function.intOp(inst.b)); defRange(inst.a.index, function.intOp(inst.c)); break; - case IrCmd::LOP_RETURN: + case IrCmd::RETURN: useRange(inst.a.index, function.intOp(inst.b)); break; case IrCmd::FASTCALL: @@ -341,7 +341,7 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& if (int count = function.intOp(inst.f); count != -1) defRange(inst.b.index, count); break; - case IrCmd::LOP_FORGLOOP: + case IrCmd::FORGLOOP: // First register is not used by instruction, we check that it's still 'nil' with CHECK_TAG use(inst.a, 1); use(inst.a, 2); @@ -349,26 +349,26 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& def(inst.a, 2); defRange(inst.a.index + 3, function.intOp(inst.b)); break; - case IrCmd::LOP_FORGLOOP_FALLBACK: - useRange(inst.b.index, 3); + case IrCmd::FORGLOOP_FALLBACK: + useRange(inst.a.index, 3); - def(inst.b, 2); - defRange(inst.b.index + 3, uint8_t(function.intOp(inst.c))); // ignore most significant bit + def(inst.a, 2); + defRange(inst.a.index + 3, uint8_t(function.intOp(inst.b))); // ignore most significant bit break; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: use(inst.b); break; // A <- B, C - case IrCmd::LOP_AND: - case IrCmd::LOP_OR: + case IrCmd::AND: + case IrCmd::OR: use(inst.b); use(inst.c); def(inst.a); break; // A <- B - case IrCmd::LOP_ANDK: - case IrCmd::LOP_ORK: + case IrCmd::ANDK: + case IrCmd::ORK: use(inst.b); def(inst.a); diff --git a/CodeGen/src/IrBuilder.cpp b/CodeGen/src/IrBuilder.cpp index 239f7a8e..4fee080b 100644 --- a/CodeGen/src/IrBuilder.cpp +++ b/CodeGen/src/IrBuilder.cpp @@ -135,7 +135,7 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) inst(IrCmd::INTERRUPT, constUint(i)); inst(IrCmd::SET_SAVEDPC, constUint(i + 1)); - inst(IrCmd::LOP_CALL, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1), constInt(LUAU_INSN_C(*pc) - 1)); + inst(IrCmd::CALL, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1), constInt(LUAU_INSN_C(*pc) - 1)); if (activeFastcallFallback) { @@ -149,7 +149,7 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) case LOP_RETURN: inst(IrCmd::INTERRUPT, constUint(i)); - inst(IrCmd::LOP_RETURN, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1)); + inst(IrCmd::RETURN, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1)); break; case LOP_GETTABLE: translateInstGetTable(*this, pc, i); @@ -266,7 +266,7 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) translateInstDupTable(*this, pc, i); break; case LOP_SETLIST: - inst(IrCmd::LOP_SETLIST, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), constInt(LUAU_INSN_C(*pc) - 1), constUint(pc[1])); + inst(IrCmd::SETLIST, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), constInt(LUAU_INSN_C(*pc) - 1), constUint(pc[1])); break; case LOP_GETUPVAL: translateInstGetUpval(*this, pc, i); @@ -347,10 +347,11 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) inst(IrCmd::INTERRUPT, constUint(i)); loadAndCheckTag(vmReg(ra), LUA_TNIL, fallback); - inst(IrCmd::LOP_FORGLOOP, vmReg(ra), constInt(aux), loopRepeat, loopExit); + inst(IrCmd::FORGLOOP, vmReg(ra), constInt(aux), loopRepeat, loopExit); beginBlock(fallback); - inst(IrCmd::LOP_FORGLOOP_FALLBACK, constUint(i), vmReg(ra), constInt(aux), loopRepeat, loopExit); + inst(IrCmd::SET_SAVEDPC, constUint(i + 1)); + inst(IrCmd::FORGLOOP_FALLBACK, vmReg(ra), constInt(aux), loopRepeat, loopExit); beginBlock(loopExit); } @@ -363,19 +364,19 @@ void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i) translateInstForGPrepInext(*this, pc, i); break; case LOP_AND: - inst(IrCmd::LOP_AND, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); + inst(IrCmd::AND, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); break; case LOP_ANDK: - inst(IrCmd::LOP_ANDK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); + inst(IrCmd::ANDK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); break; case LOP_OR: - inst(IrCmd::LOP_OR, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); + inst(IrCmd::OR, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmReg(LUAU_INSN_C(*pc))); break; case LOP_ORK: - inst(IrCmd::LOP_ORK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); + inst(IrCmd::ORK, vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), vmConst(LUAU_INSN_C(*pc))); break; case LOP_COVERAGE: - inst(IrCmd::LOP_COVERAGE, constUint(i)); + inst(IrCmd::COVERAGE, constUint(i)); break; case LOP_GETIMPORT: translateInstGetImport(*this, pc, i); diff --git a/CodeGen/src/IrCallWrapperX64.cpp b/CodeGen/src/IrCallWrapperX64.cpp new file mode 100644 index 00000000..4f0c0cf6 --- /dev/null +++ b/CodeGen/src/IrCallWrapperX64.cpp @@ -0,0 +1,400 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#include "Luau/IrCallWrapperX64.h" + +#include "Luau/AssemblyBuilderX64.h" +#include "Luau/IrRegAllocX64.h" + +#include "EmitCommonX64.h" + +namespace Luau +{ +namespace CodeGen +{ +namespace X64 +{ + +static bool sameUnderlyingRegister(RegisterX64 a, RegisterX64 b) +{ + SizeX64 underlyingSizeA = a.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword; + SizeX64 underlyingSizeB = b.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword; + + return underlyingSizeA == underlyingSizeB && a.index == b.index; +} + +IrCallWrapperX64::IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx) + : regs(regs) + , build(build) + , instIdx(instIdx) + , funcOp(noreg) +{ + gprUses.fill(0); + xmmUses.fill(0); +} + +void IrCallWrapperX64::addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp) +{ + // Instruction operands rely on current instruction index for lifetime tracking + LUAU_ASSERT(instIdx != kInvalidInstIdx || sourceOp.kind == IrOpKind::None); + + LUAU_ASSERT(argCount < kMaxCallArguments); + args[argCount++] = {targetSize, source, sourceOp}; +} + +void IrCallWrapperX64::addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg) +{ + LUAU_ASSERT(argCount < kMaxCallArguments); + args[argCount++] = {targetSize, scopedReg.release(), {}}; +} + +void IrCallWrapperX64::call(const OperandX64& func) +{ + funcOp = func; + + assignTargetRegisters(); + + countRegisterUses(); + + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + // If source is the last use of IrInst, clear the register + // Source registers are recorded separately in CallArgument + if (arg.sourceOp.kind != IrOpKind::None) + { + if (IrInst* inst = regs.function.asInstOp(arg.sourceOp)) + { + if (regs.isLastUseReg(*inst, instIdx)) + inst->regX64 = noreg; + } + } + + // Immediate values are stored at the end since they are not interfering and target register can still be used temporarily + if (arg.source.cat == CategoryX64::imm) + { + arg.candidate = false; + } + // Arguments passed through stack can be handled immediately + else if (arg.target.cat == CategoryX64::mem) + { + if (arg.source.cat == CategoryX64::mem) + { + ScopedRegX64 tmp{regs, arg.target.memSize}; + + freeSourceRegisters(arg); + + build.mov(tmp.reg, arg.source); + build.mov(arg.target, tmp.reg); + } + else + { + freeSourceRegisters(arg); + + build.mov(arg.target, arg.source); + } + + arg.candidate = false; + } + // Skip arguments that are already in their place + else if (arg.source.cat == CategoryX64::reg && sameUnderlyingRegister(arg.target.base, arg.source.base)) + { + freeSourceRegisters(arg); + + // If target is not used as source in other arguments, prevent register allocator from giving it out + if (getRegisterUses(arg.target.base) == 0) + regs.takeReg(arg.target.base); + else // Otherwise, make sure we won't free it when last source use is completed + addRegisterUse(arg.target.base); + + arg.candidate = false; + } + } + + // Repeat until we run out of arguments to pass + while (true) + { + // Find target argument register that is not an active source + if (CallArgument* candidate = findNonInterferingArgument()) + { + // This section is only for handling register targets + LUAU_ASSERT(candidate->target.cat == CategoryX64::reg); + + freeSourceRegisters(*candidate); + + LUAU_ASSERT(getRegisterUses(candidate->target.base) == 0); + regs.takeReg(candidate->target.base); + + moveToTarget(*candidate); + + candidate->candidate = false; + } + // If all registers cross-interfere (rcx <- rdx, rdx <- rcx), one has to be renamed + else if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg) + { + // Get a fresh register + RegisterX64 freshReg = conflict.size == SizeX64::xmmword ? regs.allocXmmReg() : regs.allocGprReg(conflict.size); + + if (conflict.size == SizeX64::xmmword) + build.vmovsd(freshReg, conflict, conflict); + else + build.mov(freshReg, conflict); + + renameSourceRegisters(conflict, freshReg); + } + else + { + for (int i = 0; i < argCount; ++i) + LUAU_ASSERT(!args[i].candidate); + break; + } + } + + // Handle immediate arguments last + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.source.cat == CategoryX64::imm) + { + if (arg.target.cat == CategoryX64::reg) + regs.takeReg(arg.target.base); + + moveToTarget(arg); + } + } + + // Free registers used in the function call + removeRegisterUse(funcOp.base); + removeRegisterUse(funcOp.index); + + // Just before the call is made, argument registers are all marked as free in register allocator + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.target.cat == CategoryX64::reg) + regs.freeReg(arg.target.base); + } + + build.call(funcOp); +} + +void IrCallWrapperX64::assignTargetRegisters() +{ + static const std::array kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]}; + static const std::array kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9}; + + const std::array& gprOrder = build.abi == ABIX64::Windows ? kWindowsGprOrder : kSystemvGprOrder; + static const std::array kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV + + int gprPos = 0; + int xmmPos = 0; + + for (int i = 0; i < argCount; i++) + { + CallArgument& arg = args[i]; + + if (arg.targetSize == SizeX64::xmmword) + { + LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size()); + arg.target = kXmmOrder[xmmPos++]; + + if (build.abi == ABIX64::Windows) + gprPos++; // On Windows, gpr/xmm register positions move in sync + } + else + { + LUAU_ASSERT(size_t(gprPos) < gprOrder.size()); + arg.target = gprOrder[gprPos++]; + + if (build.abi == ABIX64::Windows) + xmmPos++; // On Windows, gpr/xmm register positions move in sync + + // Keep requested argument size + if (arg.target.cat == CategoryX64::reg) + arg.target.base.size = arg.targetSize; + else if (arg.target.cat == CategoryX64::mem) + arg.target.memSize = arg.targetSize; + } + } +} + +void IrCallWrapperX64::countRegisterUses() +{ + for (int i = 0; i < argCount; ++i) + { + addRegisterUse(args[i].source.base); + addRegisterUse(args[i].source.index); + } + + addRegisterUse(funcOp.base); + addRegisterUse(funcOp.index); +} + +CallArgument* IrCallWrapperX64::findNonInterferingArgument() +{ + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.candidate && !interferesWithActiveSources(arg, i) && !interferesWithOperand(funcOp, arg.target.base)) + return &arg; + } + + return nullptr; +} + +bool IrCallWrapperX64::interferesWithOperand(const OperandX64& op, RegisterX64 reg) const +{ + return sameUnderlyingRegister(op.base, reg) || sameUnderlyingRegister(op.index, reg); +} + +bool IrCallWrapperX64::interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const +{ + for (int i = 0; i < argCount; ++i) + { + const CallArgument& arg = args[i]; + + if (arg.candidate && i != targetArgIndex && interferesWithOperand(arg.source, targetArg.target.base)) + return true; + } + + return false; +} + +bool IrCallWrapperX64::interferesWithActiveTarget(RegisterX64 sourceReg) const +{ + for (int i = 0; i < argCount; ++i) + { + const CallArgument& arg = args[i]; + + if (arg.candidate && sameUnderlyingRegister(arg.target.base, sourceReg)) + return true; + } + + return false; +} + +void IrCallWrapperX64::moveToTarget(CallArgument& arg) +{ + if (arg.source.cat == CategoryX64::reg) + { + RegisterX64 source = arg.source.base; + + if (source.size == SizeX64::xmmword) + build.vmovsd(arg.target, source, source); + else + build.mov(arg.target, source); + } + else if (arg.source.cat == CategoryX64::imm) + { + build.mov(arg.target, arg.source); + } + else + { + if (arg.source.memSize == SizeX64::none) + build.lea(arg.target, arg.source); + else if (arg.target.base.size == SizeX64::xmmword && arg.source.memSize == SizeX64::xmmword) + build.vmovups(arg.target, arg.source); + else if (arg.target.base.size == SizeX64::xmmword) + build.vmovsd(arg.target, arg.source); + else + build.mov(arg.target, arg.source); + } +} + +void IrCallWrapperX64::freeSourceRegisters(CallArgument& arg) +{ + removeRegisterUse(arg.source.base); + removeRegisterUse(arg.source.index); +} + +void IrCallWrapperX64::renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement) +{ + if (sameUnderlyingRegister(target, reg)) + { + addRegisterUse(replacement); + removeRegisterUse(target); + + target.index = replacement.index; // Only change index, size is preserved + } +} + +void IrCallWrapperX64::renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement) +{ + for (int i = 0; i < argCount; ++i) + { + CallArgument& arg = args[i]; + + if (arg.candidate) + { + renameRegister(arg.source.base, reg, replacement); + renameRegister(arg.source.index, reg, replacement); + } + } + + renameRegister(funcOp.base, reg, replacement); + renameRegister(funcOp.index, reg, replacement); +} + +RegisterX64 IrCallWrapperX64::findConflictingTarget() const +{ + for (int i = 0; i < argCount; ++i) + { + const CallArgument& arg = args[i]; + + if (arg.candidate) + { + if (interferesWithActiveTarget(arg.source.base)) + return arg.source.base; + + if (interferesWithActiveTarget(arg.source.index)) + return arg.source.index; + } + } + + if (interferesWithActiveTarget(funcOp.base)) + return funcOp.base; + + if (interferesWithActiveTarget(funcOp.index)) + return funcOp.index; + + return noreg; +} + +int IrCallWrapperX64::getRegisterUses(RegisterX64 reg) const +{ + return reg.size == SizeX64::xmmword ? xmmUses[reg.index] : (reg.size != SizeX64::none ? gprUses[reg.index] : 0); +} + +void IrCallWrapperX64::addRegisterUse(RegisterX64 reg) +{ + if (reg.size == SizeX64::xmmword) + xmmUses[reg.index]++; + else if (reg.size != SizeX64::none) + gprUses[reg.index]++; +} + +void IrCallWrapperX64::removeRegisterUse(RegisterX64 reg) +{ + if (reg.size == SizeX64::xmmword) + { + LUAU_ASSERT(xmmUses[reg.index] != 0); + xmmUses[reg.index]--; + + if (xmmUses[reg.index] == 0) // we don't use persistent xmm regs so no need to call shouldFreeRegister + regs.freeReg(reg); + } + else if (reg.size != SizeX64::none) + { + LUAU_ASSERT(gprUses[reg.index] != 0); + gprUses[reg.index]--; + + if (gprUses[reg.index] == 0 && regs.shouldFreeGpr(reg)) + regs.freeReg(reg); + } +} + +} // namespace X64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/src/IrDump.cpp b/CodeGen/src/IrDump.cpp index 53654d6a..fb56df8c 100644 --- a/CodeGen/src/IrDump.cpp +++ b/CodeGen/src/IrDump.cpp @@ -126,6 +126,16 @@ const char* getCmdName(IrCmd cmd) return "MAX_NUM"; case IrCmd::UNM_NUM: return "UNM_NUM"; + case IrCmd::FLOOR_NUM: + return "FLOOR_NUM"; + case IrCmd::CEIL_NUM: + return "CEIL_NUM"; + case IrCmd::ROUND_NUM: + return "ROUND_NUM"; + case IrCmd::SQRT_NUM: + return "SQRT_NUM"; + case IrCmd::ABS_NUM: + return "ABS_NUM"; case IrCmd::NOT_ANY: return "NOT_ANY"; case IrCmd::JUMP: @@ -216,28 +226,28 @@ const char* getCmdName(IrCmd cmd) return "CLOSE_UPVALS"; case IrCmd::CAPTURE: return "CAPTURE"; - case IrCmd::LOP_SETLIST: - return "LOP_SETLIST"; - case IrCmd::LOP_CALL: - return "LOP_CALL"; - case IrCmd::LOP_RETURN: - return "LOP_RETURN"; - case IrCmd::LOP_FORGLOOP: - return "LOP_FORGLOOP"; - case IrCmd::LOP_FORGLOOP_FALLBACK: - return "LOP_FORGLOOP_FALLBACK"; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: - return "LOP_FORGPREP_XNEXT_FALLBACK"; - case IrCmd::LOP_AND: - return "LOP_AND"; - case IrCmd::LOP_ANDK: - return "LOP_ANDK"; - case IrCmd::LOP_OR: - return "LOP_OR"; - case IrCmd::LOP_ORK: - return "LOP_ORK"; - case IrCmd::LOP_COVERAGE: - return "LOP_COVERAGE"; + case IrCmd::SETLIST: + return "SETLIST"; + case IrCmd::CALL: + return "CALL"; + case IrCmd::RETURN: + return "RETURN"; + case IrCmd::FORGLOOP: + return "FORGLOOP"; + case IrCmd::FORGLOOP_FALLBACK: + return "FORGLOOP_FALLBACK"; + case IrCmd::FORGPREP_XNEXT_FALLBACK: + return "FORGPREP_XNEXT_FALLBACK"; + case IrCmd::AND: + return "AND"; + case IrCmd::ANDK: + return "ANDK"; + case IrCmd::OR: + return "OR"; + case IrCmd::ORK: + return "ORK"; + case IrCmd::COVERAGE: + return "COVERAGE"; case IrCmd::FALLBACK_GETGLOBAL: return "FALLBACK_GETGLOBAL"; case IrCmd::FALLBACK_SETGLOBAL: diff --git a/CodeGen/src/IrLoweringA64.cpp b/CodeGen/src/IrLoweringA64.cpp index ae4bc017..37f38157 100644 --- a/CodeGen/src/IrLoweringA64.cpp +++ b/CodeGen/src/IrLoweringA64.cpp @@ -13,6 +13,9 @@ #include "lstate.h" +// TODO: Eventually this can go away +// #define TRACE + namespace Luau { namespace CodeGen @@ -20,12 +23,67 @@ namespace CodeGen namespace A64 { +#ifdef TRACE +struct LoweringStatsA64 +{ + size_t can; + size_t total; + + ~LoweringStatsA64() + { + if (total) + printf("A64 lowering succeded for %.1f%% functions (%d/%d)\n", double(can) / double(total) * 100, int(can), int(total)); + } +} gStatsA64; +#endif + +inline ConditionA64 getConditionFP(IrCondition cond) +{ + switch (cond) + { + case IrCondition::Equal: + return ConditionA64::Equal; + + case IrCondition::NotEqual: + return ConditionA64::NotEqual; + + case IrCondition::Less: + return ConditionA64::Minus; + + case IrCondition::NotLess: + return ConditionA64::Plus; + + case IrCondition::LessEqual: + return ConditionA64::UnsignedLessEqual; + + case IrCondition::NotLessEqual: + return ConditionA64::UnsignedGreater; + + case IrCondition::Greater: + return ConditionA64::Greater; + + case IrCondition::NotGreater: + return ConditionA64::LessEqual; + + case IrCondition::GreaterEqual: + return ConditionA64::GreaterEqual; + + case IrCondition::NotGreaterEqual: + return ConditionA64::Less; + + default: + LUAU_ASSERT(!"Unexpected condition code"); + return ConditionA64::Always; + } +} + IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function) : build(build) , helpers(helpers) , data(data) , proto(proto) , function(function) + , regs(function, {{x0, x15}, {q0, q7}, {q16, q31}}) { // In order to allocate registers during lowering, we need to know where instruction results are last used updateLastUseLocations(function); @@ -34,20 +92,61 @@ IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, // TODO: Eventually this can go away bool IrLoweringA64::canLower(const IrFunction& function) { +#ifdef TRACE + gStatsA64.total++; +#endif + for (const IrInst& inst : function.instructions) { switch (inst.cmd) { case IrCmd::NOP: - case IrCmd::SUBSTITUTE: + case IrCmd::LOAD_TAG: + case IrCmd::LOAD_POINTER: + case IrCmd::LOAD_DOUBLE: + case IrCmd::LOAD_INT: + case IrCmd::LOAD_TVALUE: + case IrCmd::LOAD_NODE_VALUE_TV: + case IrCmd::LOAD_ENV: + case IrCmd::STORE_TAG: + case IrCmd::STORE_POINTER: + case IrCmd::STORE_DOUBLE: + case IrCmd::STORE_INT: + case IrCmd::STORE_TVALUE: + case IrCmd::STORE_NODE_VALUE_TV: + case IrCmd::ADD_NUM: + case IrCmd::SUB_NUM: + case IrCmd::MUL_NUM: + case IrCmd::DIV_NUM: + case IrCmd::MOD_NUM: + case IrCmd::UNM_NUM: + case IrCmd::JUMP: + case IrCmd::JUMP_EQ_TAG: + case IrCmd::JUMP_CMP_NUM: + case IrCmd::JUMP_CMP_ANY: + case IrCmd::DO_ARITH: + case IrCmd::GET_IMPORT: + case IrCmd::GET_UPVALUE: + case IrCmd::CHECK_TAG: + case IrCmd::CHECK_READONLY: + case IrCmd::CHECK_NO_METATABLE: + case IrCmd::CHECK_SAFE_ENV: case IrCmd::INTERRUPT: - case IrCmd::LOP_RETURN: + case IrCmd::SET_SAVEDPC: + case IrCmd::CALL: + case IrCmd::RETURN: + case IrCmd::SUBSTITUTE: continue; + default: return false; } } +#ifdef TRACE + gStatsA64.can++; +#endif + return true; } @@ -55,23 +154,338 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) { switch (inst.cmd) { + case IrCmd::LOAD_TAG: + { + inst.regA64 = regs.allocReg(KindA64::w); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_POINTER: + { + inst.regA64 = regs.allocReg(KindA64::x); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_DOUBLE: + { + inst.regA64 = regs.allocReg(KindA64::d); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_INT: + { + inst.regA64 = regs.allocReg(KindA64::w); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_TVALUE: + { + inst.regA64 = regs.allocReg(KindA64::q); + AddressA64 addr = tempAddr(inst.a, 0); + build.ldr(inst.regA64, addr); + break; + } + case IrCmd::LOAD_NODE_VALUE_TV: + { + inst.regA64 = regs.allocReg(KindA64::q); + build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(LuaNode, val))); + break; + } + case IrCmd::LOAD_ENV: + inst.regA64 = regs.allocReg(KindA64::x); + build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env))); + break; + case IrCmd::STORE_TAG: + { + RegisterA64 temp = regs.allocTemp(KindA64::w); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt)); + build.mov(temp, tagOp(inst.b)); + build.str(temp, addr); + break; + } + case IrCmd::STORE_POINTER: + { + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.str(regOp(inst.b), addr); + break; + } + case IrCmd::STORE_DOUBLE: + { + RegisterA64 temp = tempDouble(inst.b); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.str(temp, addr); + break; + } + case IrCmd::STORE_INT: + { + RegisterA64 temp = tempInt(inst.b); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + build.str(temp, addr); + break; + } + case IrCmd::STORE_TVALUE: + { + AddressA64 addr = tempAddr(inst.a, 0); + build.str(regOp(inst.b), addr); + break; + } + case IrCmd::STORE_NODE_VALUE_TV: + build.str(regOp(inst.b), mem(regOp(inst.a), offsetof(LuaNode, val))); + break; + case IrCmd::ADD_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fadd(inst.regA64, temp1, temp2); + break; + } + case IrCmd::SUB_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fsub(inst.regA64, temp1, temp2); + break; + } + case IrCmd::MUL_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fmul(inst.regA64, temp1, temp2); + break; + } + case IrCmd::DIV_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fdiv(inst.regA64, temp1, temp2); + break; + } + case IrCmd::MOD_NUM: + { + inst.regA64 = regs.allocReg(KindA64::d); + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + build.fdiv(inst.regA64, temp1, temp2); + build.frintm(inst.regA64, inst.regA64); + build.fmul(inst.regA64, inst.regA64, temp2); + build.fsub(inst.regA64, temp1, inst.regA64); + break; + } + case IrCmd::UNM_NUM: + { + inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); + RegisterA64 temp = tempDouble(inst.a); + build.fneg(inst.regA64, temp); + break; + } + case IrCmd::JUMP: + jumpOrFallthrough(blockOp(inst.a), next); + break; + case IrCmd::JUMP_EQ_TAG: + if (inst.b.kind == IrOpKind::Constant) + build.cmp(regOp(inst.a), tagOp(inst.b)); + else if (inst.b.kind == IrOpKind::Inst) + build.cmp(regOp(inst.a), regOp(inst.b)); + else + LUAU_ASSERT(!"Unsupported instruction form"); + + if (isFallthroughBlock(blockOp(inst.d), next)) + { + build.b(ConditionA64::Equal, labelOp(inst.c)); + jumpOrFallthrough(blockOp(inst.d), next); + } + else + { + build.b(ConditionA64::NotEqual, labelOp(inst.d)); + jumpOrFallthrough(blockOp(inst.c), next); + } + break; + case IrCmd::JUMP_CMP_NUM: + { + IrCondition cond = conditionOp(inst.c); + + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + + build.fcmp(temp1, temp2); + build.b(getConditionFP(cond), labelOp(inst.d)); + jumpOrFallthrough(blockOp(inst.e), next); + break; + } + case IrCmd::JUMP_CMP_ANY: + { + IrCondition cond = conditionOp(inst.c); + + regs.assertAllFree(); + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); + + if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual) + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal))); + else if (cond == IrCondition::NotLess || cond == IrCondition::Less) + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan))); + else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal) + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval))); + else + LUAU_ASSERT(!"Unsupported condition"); + + build.blr(x3); + + emitUpdateBase(build); + + if (cond == IrCondition::NotLessEqual || cond == IrCondition::NotLess || cond == IrCondition::NotEqual) + build.cbz(x0, labelOp(inst.d)); + else + build.cbnz(x0, labelOp(inst.d)); + jumpOrFallthrough(blockOp(inst.e), next); + break; + } + case IrCmd::DO_ARITH: + regs.assertAllFree(); + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); + + if (inst.c.kind == IrOpKind::VmConst) + { + // TODO: refactor into a common helper + if (vmConstOp(inst.c) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(x3, rConstants, uint16_t(vmConstOp(inst.c) * sizeof(TValue))); + } + else + { + build.mov(x3, vmConstOp(inst.c) * sizeof(TValue)); + build.add(x3, rConstants, x3); + } + } + else + build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue))); + + build.mov(w4, TMS(intOp(inst.d))); + build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_doarith))); + build.blr(x5); + + emitUpdateBase(build); + break; + case IrCmd::GET_IMPORT: + regs.assertAllFree(); + emitInstGetImport(build, vmRegOp(inst.a), uintOp(inst.b)); + break; + case IrCmd::GET_UPVALUE: + { + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::q); + RegisterA64 temp3 = regs.allocTemp(KindA64::w); + + build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b))); + + // uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value + Label skip; + build.ldr(temp3, mem(temp1, offsetof(TValue, tt))); + build.cmp(temp3, LUA_TUPVAL); + build.b(ConditionA64::NotEqual, skip); + + // UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally) + build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc))); + build.ldr(temp1, mem(temp1, offsetof(UpVal, v))); + + build.setLabel(skip); + + build.ldr(temp2, temp1); + build.str(temp2, mem(rBase, vmRegOp(inst.a) * sizeof(TValue))); + break; + } + case IrCmd::CHECK_TAG: + build.cmp(regOp(inst.a), tagOp(inst.b)); + build.b(ConditionA64::NotEqual, labelOp(inst.c)); + break; + case IrCmd::CHECK_READONLY: + { + RegisterA64 temp = regs.allocTemp(KindA64::w); + build.ldrb(temp, mem(regOp(inst.a), offsetof(Table, readonly))); + build.cbnz(temp, labelOp(inst.b)); + break; + } + case IrCmd::CHECK_NO_METATABLE: + { + RegisterA64 temp = regs.allocTemp(KindA64::x); + build.ldr(temp, mem(regOp(inst.a), offsetof(Table, metatable))); + build.cbnz(temp, labelOp(inst.b)); + break; + } + case IrCmd::CHECK_SAFE_ENV: + { + RegisterA64 temp = regs.allocTemp(KindA64::x); + RegisterA64 tempw{KindA64::w, temp.index}; + build.ldr(temp, mem(rClosure, offsetof(Closure, env))); + build.ldrb(tempw, mem(temp, offsetof(Table, safeenv))); + build.cbz(tempw, labelOp(inst.a)); + break; + } case IrCmd::INTERRUPT: { - emitInterrupt(build, uintOp(inst.a)); + unsigned int pcpos = uintOp(inst.a); + regs.assertAllFree(); + + Label skip; + build.ldr(x2, mem(rState, offsetof(lua_State, global))); + build.ldr(x2, mem(x2, offsetof(global_State, cb.interrupt))); + build.cbz(x2, skip); + + // Jump to outlined interrupt handler, it will give back control to x1 + build.mov(x0, (pcpos + 1) * sizeof(Instruction)); + build.adr(x1, skip); + build.b(helpers.interrupt); + + build.setLabel(skip); break; } - case IrCmd::LOP_RETURN: + case IrCmd::SET_SAVEDPC: { + unsigned int pcpos = uintOp(inst.a); + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::x); + + // TODO: refactor into a common helper + if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(temp1, rCode, uint16_t(pcpos * sizeof(Instruction))); + } + else + { + build.mov(temp1, pcpos * sizeof(Instruction)); + build.add(temp1, rCode, temp1); + } + + build.ldr(temp2, mem(rState, offsetof(lua_State, ci))); + build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc))); + break; + } + case IrCmd::CALL: + regs.assertAllFree(); + emitInstCall(build, helpers, vmRegOp(inst.a), intOp(inst.b), intOp(inst.c)); + break; + case IrCmd::RETURN: + regs.assertAllFree(); emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b)); break; - } default: LUAU_ASSERT(!"Not supported yet"); break; } - // TODO - // regs.freeLastUseRegs(inst, index); + regs.freeLastUseRegs(inst, index); + regs.freeTempRegs(); } bool IrLoweringA64::isFallthroughBlock(IrBlock target, IrBlock next) @@ -85,6 +499,83 @@ void IrLoweringA64::jumpOrFallthrough(IrBlock& target, IrBlock& next) build.b(target.label); } +RegisterA64 IrLoweringA64::tempDouble(IrOp op) +{ + if (op.kind == IrOpKind::Inst) + return regOp(op); + else if (op.kind == IrOpKind::Constant) + { + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::d); + build.adr(temp1, doubleOp(op)); + build.ldr(temp2, temp1); + return temp2; + } + else + { + LUAU_ASSERT(!"Unsupported instruction form"); + return noreg; + } +} + +RegisterA64 IrLoweringA64::tempInt(IrOp op) +{ + if (op.kind == IrOpKind::Inst) + return regOp(op); + else if (op.kind == IrOpKind::Constant) + { + RegisterA64 temp = regs.allocTemp(KindA64::w); + build.mov(temp, intOp(op)); + return temp; + } + else + { + LUAU_ASSERT(!"Unsupported instruction form"); + return noreg; + } +} + +AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset) +{ + // This is needed to tighten the bounds checks in the VmConst case below + LUAU_ASSERT(offset % 4 == 0); + + if (op.kind == IrOpKind::VmReg) + return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset); + else if (op.kind == IrOpKind::VmConst) + { + size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset; + + // Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries + if (constantOffset / 4 <= AddressA64::kMaxOffset) + return mem(rConstants, int(constantOffset)); + + RegisterA64 temp = regs.allocTemp(KindA64::x); + + // TODO: refactor into a common helper + if (constantOffset <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(temp, rConstants, uint16_t(constantOffset)); + } + else + { + build.mov(temp, int(constantOffset)); + build.add(temp, rConstants, temp); + } + + return temp; + } + // If we have a register, we assume it's a pointer to TValue + // We might introduce explicit operand types in the future to make this more robust + else if (op.kind == IrOpKind::Inst) + return mem(regOp(op), offset); + else + { + LUAU_ASSERT(!"Unsupported instruction form"); + return noreg; + } +} + RegisterA64 IrLoweringA64::regOp(IrOp op) const { IrInst& inst = function.instOp(op); diff --git a/CodeGen/src/IrLoweringA64.h b/CodeGen/src/IrLoweringA64.h index aa9eba42..f638432f 100644 --- a/CodeGen/src/IrLoweringA64.h +++ b/CodeGen/src/IrLoweringA64.h @@ -4,6 +4,8 @@ #include "Luau/AssemblyBuilderA64.h" #include "Luau/IrData.h" +#include "IrRegAllocA64.h" + #include struct Proto; @@ -31,6 +33,11 @@ struct IrLoweringA64 bool isFallthroughBlock(IrBlock target, IrBlock next); void jumpOrFallthrough(IrBlock& target, IrBlock& next); + // Operand data build helpers + RegisterA64 tempDouble(IrOp op); + RegisterA64 tempInt(IrOp op); + AddressA64 tempAddr(IrOp op, int offset); + // Operand data lookup helpers RegisterA64 regOp(IrOp op) const; @@ -51,8 +58,7 @@ struct IrLoweringA64 IrFunction& function; - // TODO: - // IrRegAllocA64 regs; + IrRegAllocA64 regs; }; } // namespace A64 diff --git a/CodeGen/src/IrLoweringX64.cpp b/CodeGen/src/IrLoweringX64.cpp index 1cc56fe3..8c45f36a 100644 --- a/CodeGen/src/IrLoweringX64.cpp +++ b/CodeGen/src/IrLoweringX64.cpp @@ -4,6 +4,7 @@ #include "Luau/CodeGen.h" #include "Luau/DenseHash.h" #include "Luau/IrAnalysis.h" +#include "Luau/IrCallWrapperX64.h" #include "Luau/IrDump.h" #include "Luau/IrUtils.h" @@ -141,7 +142,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) inst.regX64 = regs.allocGprReg(SizeX64::qword); // Custom bit shift value can only be placed in cl - ScopedRegX64 shiftTmp{regs, regs.takeGprReg(rcx)}; + ScopedRegX64 shiftTmp{regs, regs.takeReg(rcx)}; ScopedRegX64 tmp{regs, SizeX64::qword}; @@ -325,82 +326,11 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::POW_NUM: { - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); - - ScopedRegX64 optLhsTmp{regs}; - RegisterX64 lhs; - - if (inst.a.kind == IrOpKind::Constant) - { - optLhsTmp.alloc(SizeX64::xmmword); - - build.vmovsd(optLhsTmp.reg, memRegDoubleOp(inst.a)); - lhs = optLhsTmp.reg; - } - else - { - lhs = regOp(inst.a); - } - - if (inst.b.kind == IrOpKind::Inst) - { - // TODO: this doesn't happen with current local-only register allocation, but has to be handled in the future - LUAU_ASSERT(regOp(inst.b) != xmm0); - - if (lhs != xmm0) - build.vmovsd(xmm0, lhs, lhs); - - if (regOp(inst.b) != xmm1) - build.vmovsd(xmm1, regOp(inst.b), regOp(inst.b)); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); - - if (inst.regX64 != xmm0) - build.vmovsd(inst.regX64, xmm0, xmm0); - } - else if (inst.b.kind == IrOpKind::Constant) - { - double rhs = doubleOp(inst.b); - - if (rhs == 2.0) - { - build.vmulsd(inst.regX64, lhs, lhs); - } - else if (rhs == 0.5) - { - build.vsqrtsd(inst.regX64, lhs, lhs); - } - else if (rhs == 3.0) - { - ScopedRegX64 tmp{regs, SizeX64::xmmword}; - - build.vmulsd(tmp.reg, lhs, lhs); - build.vmulsd(inst.regX64, lhs, tmp.reg); - } - else - { - if (lhs != xmm0) - build.vmovsd(xmm0, xmm0, lhs); - - build.vmovsd(xmm1, build.f64(rhs)); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); - - if (inst.regX64 != xmm0) - build.vmovsd(inst.regX64, xmm0, xmm0); - } - } - else - { - if (lhs != xmm0) - build.vmovsd(xmm0, lhs, lhs); - - build.vmovsd(xmm1, memRegDoubleOp(inst.b)); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); - - if (inst.regX64 != xmm0) - build.vmovsd(inst.regX64, xmm0, xmm0); - } - + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(inst.a), inst.a); + callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(inst.b), inst.b); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_pow)]); + inst.regX64 = regs.takeReg(xmm0); break; } case IrCmd::MIN_NUM: @@ -451,6 +381,46 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } + case IrCmd::FLOOR_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToNegativeInfinity); + break; + case IrCmd::CEIL_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToPositiveInfinity); + break; + case IrCmd::ROUND_NUM: + { + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + ScopedRegX64 tmp1{regs, SizeX64::xmmword}; + ScopedRegX64 tmp2{regs, SizeX64::xmmword}; + + if (inst.a.kind != IrOpKind::Inst || regOp(inst.a) != inst.regX64) + build.vmovsd(inst.regX64, memRegDoubleOp(inst.a)); + + build.vandpd(tmp1.reg, inst.regX64, build.f64x2(-0.0, -0.0)); + build.vmovsd(tmp2.reg, build.i64(0x3fdfffffffffffff)); // 0.49999999999999994 + build.vorpd(tmp1.reg, tmp1.reg, tmp2.reg); + build.vaddsd(inst.regX64, inst.regX64, tmp1.reg); + build.vroundsd(inst.regX64, inst.regX64, inst.regX64, RoundingModeX64::RoundToZero); + break; + } + case IrCmd::SQRT_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + build.vsqrtsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a)); + break; + case IrCmd::ABS_NUM: + inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + + if (inst.a.kind != IrOpKind::Inst || regOp(inst.a) != inst.regX64) + build.vmovsd(inst.regX64, memRegDoubleOp(inst.a)); + + build.vandpd(inst.regX64, inst.regX64, build.i64(~(1LL << 63))); + break; case IrCmd::NOT_ANY: { // TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target @@ -539,7 +509,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::JUMP_CMP_ANY: - jumpOnAnyCmpFallback(build, vmRegOp(inst.a), vmRegOp(inst.b), conditionOp(inst.c), labelOp(inst.d)); + jumpOnAnyCmpFallback(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), conditionOp(inst.c), labelOp(inst.d)); jumpOrFallthrough(blockOp(inst.e), next); break; case IrCmd::JUMP_SLOT_MATCH: @@ -551,34 +521,34 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::TABLE_LEN: - inst.regX64 = regs.allocXmmReg(); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]); - build.mov(rArg1, regOp(inst.a)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]); + inst.regX64 = regs.allocXmmReg(); build.vcvtsi2sd(inst.regX64, inst.regX64, eax); break; + } case IrCmd::NEW_TABLE: - inst.regX64 = regs.allocGprReg(SizeX64::qword); - - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), uintOp(inst.a)); - build.mov(dwordReg(rArg3), uintOp(inst.b)); - build.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]); - - if (inst.regX64 != rax) - build.mov(inst.regX64, rax); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.a)), inst.a); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b)), inst.b); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]); + inst.regX64 = regs.takeReg(rax); break; + } case IrCmd::DUP_TABLE: - inst.regX64 = regs.allocGprReg(SizeX64::qword); - - // Re-ordered to avoid register conflict - build.mov(rArg2, regOp(inst.a)); - build.mov(rArg1, rState); - build.call(qword[rNativeContext + offsetof(NativeContext, luaH_clone)]); - - if (inst.regX64 != rax) - build.mov(inst.regX64, rax); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_clone)]); + inst.regX64 = regs.takeReg(rax); break; + } case IrCmd::TRY_NUM_TO_INDEX: { inst.regX64 = regs.allocGprReg(SizeX64::dword); @@ -590,12 +560,26 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::TRY_CALL_FASTGETTM: { - inst.regX64 = regs.allocGprReg(SizeX64::qword); + ScopedRegX64 tmp{regs, SizeX64::qword}; - callGetFastTmOrFallback(build, regOp(inst.a), TMS(intOp(inst.b)), labelOp(inst.c)); + build.mov(tmp.reg, qword[regOp(inst.a) + offsetof(Table, metatable)]); + regs.freeLastUseReg(function.instOp(inst.a), index); // Release before the call if it's the last use - if (inst.regX64 != rax) - build.mov(inst.regX64, rax); + build.test(tmp.reg, tmp.reg); + build.jcc(ConditionX64::Zero, labelOp(inst.c)); // No metatable + + build.test(byte[tmp.reg + offsetof(Table, tmcache)], 1 << intOp(inst.b)); + build.jcc(ConditionX64::NotZero, labelOp(inst.c)); // No tag method + + ScopedRegX64 tmp2{regs, SizeX64::qword}; + build.mov(tmp2.reg, qword[rState + offsetof(lua_State, global)]); + + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.addArgument(SizeX64::qword, intOp(inst.b)); + callWrap.addArgument(SizeX64::qword, qword[tmp2.release() + offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)]); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_gettm)]); + inst.regX64 = regs.takeReg(rax); break; } case IrCmd::INT_TO_NUM: @@ -701,7 +685,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.call(rax); - inst.regX64 = regs.takeGprReg(eax); // Result of a builtin call is returned in eax + inst.regX64 = regs.takeReg(eax); // Result of a builtin call is returned in eax break; } case IrCmd::CHECK_FASTCALL_RES: @@ -714,23 +698,23 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::DO_ARITH: if (inst.c.kind == IrOpKind::VmReg) - callArithHelper(build, vmRegOp(inst.a), vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), TMS(intOp(inst.d))); + callArithHelper(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), TMS(intOp(inst.d))); else - callArithHelper(build, vmRegOp(inst.a), vmRegOp(inst.b), luauConstantAddress(vmConstOp(inst.c)), TMS(intOp(inst.d))); + callArithHelper(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), luauConstantAddress(vmConstOp(inst.c)), TMS(intOp(inst.d))); break; case IrCmd::DO_LEN: - callLengthHelper(build, vmRegOp(inst.a), vmRegOp(inst.b)); + callLengthHelper(regs, build, vmRegOp(inst.a), vmRegOp(inst.b)); break; case IrCmd::GET_TABLE: if (inst.c.kind == IrOpKind::VmReg) { - callGetTable(build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); + callGetTable(regs, build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); } else if (inst.c.kind == IrOpKind::Constant) { TValue n; setnvalue(&n, uintOp(inst.c)); - callGetTable(build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); + callGetTable(regs, build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); } else { @@ -740,13 +724,13 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) case IrCmd::SET_TABLE: if (inst.c.kind == IrOpKind::VmReg) { - callSetTable(build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); + callSetTable(regs, build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a)); } else if (inst.c.kind == IrOpKind::Constant) { TValue n; setnvalue(&n, uintOp(inst.c)); - callSetTable(build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); + callSetTable(regs, build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a)); } else { @@ -757,13 +741,16 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) emitInstGetImportFallback(build, vmRegOp(inst.a), uintOp(inst.b)); break; case IrCmd::CONCAT: - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), uintOp(inst.b)); - build.mov(dwordReg(rArg3), vmRegOp(inst.a) + uintOp(inst.b) - 1); - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_concat)]); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b))); + callWrap.addArgument(SizeX64::dword, int32_t(vmRegOp(inst.a) + uintOp(inst.b) - 1)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_concat)]); emitUpdateBase(build); break; + } case IrCmd::GET_UPVALUE: { ScopedRegX64 tmp1{regs, SizeX64::qword}; @@ -793,21 +780,26 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) Label next; ScopedRegX64 tmp1{regs, SizeX64::qword}; ScopedRegX64 tmp2{regs, SizeX64::qword}; - ScopedRegX64 tmp3{regs, SizeX64::xmmword}; build.mov(tmp1.reg, sClosure); build.mov(tmp2.reg, qword[tmp1.reg + offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)]); build.mov(tmp1.reg, qword[tmp2.reg + offsetof(UpVal, v)]); - build.vmovups(tmp3.reg, luauReg(vmRegOp(inst.b))); - build.vmovups(xmmword[tmp1.reg], tmp3.reg); - callBarrierObject(build, tmp1.reg, tmp2.reg, vmRegOp(inst.b), next); + { + ScopedRegX64 tmp3{regs, SizeX64::xmmword}; + build.vmovups(tmp3.reg, luauReg(vmRegOp(inst.b))); + build.vmovups(xmmword[tmp1.reg], tmp3.reg); + } + + tmp1.free(); + + callBarrierObject(regs, build, tmp2.release(), {}, vmRegOp(inst.b), next); build.setLabel(next); break; } case IrCmd::PREPARE_FORN: - callPrepareForN(build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); + callPrepareForN(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); break; case IrCmd::CHECK_TAG: if (inst.a.kind == IrOpKind::Inst) @@ -863,38 +855,43 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) jumpIfNodeHasNext(build, regOp(inst.a), labelOp(inst.b)); break; case IrCmd::INTERRUPT: + regs.assertAllFree(); emitInterrupt(build, uintOp(inst.a)); break; case IrCmd::CHECK_GC: { Label skip; - callCheckGc(build, -1, false, skip); + callCheckGc(regs, build, skip); build.setLabel(skip); break; } case IrCmd::BARRIER_OBJ: { Label skip; - ScopedRegX64 tmp{regs, SizeX64::qword}; - - callBarrierObject(build, tmp.reg, regOp(inst.a), vmRegOp(inst.b), skip); + callBarrierObject(regs, build, regOp(inst.a), inst.a, vmRegOp(inst.b), skip); build.setLabel(skip); break; } case IrCmd::BARRIER_TABLE_BACK: { Label skip; - - callBarrierTableFast(build, regOp(inst.a), skip); + callBarrierTableFast(regs, build, regOp(inst.a), inst.a, skip); build.setLabel(skip); break; } case IrCmd::BARRIER_TABLE_FORWARD: { Label skip; - ScopedRegX64 tmp{regs, SizeX64::qword}; - callBarrierTable(build, tmp.reg, regOp(inst.a), vmRegOp(inst.b), skip); + ScopedRegX64 tmp{regs, SizeX64::qword}; + checkObjectBarrierConditions(build, tmp.reg, regOp(inst.a), vmRegOp(inst.b), skip); + + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barriertable)]); + build.setLabel(skip); break; } @@ -926,11 +923,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(UpVal, v)]); build.jcc(ConditionX64::Above, next); - if (rArg2 != tmp2.reg) - build.mov(rArg2, tmp2.reg); + tmp1.free(); - build.mov(rArg1, rState); - build.call(qword[rNativeContext + offsetof(NativeContext, luaF_close)]); + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_close)]); build.setLabel(next); break; @@ -940,42 +938,53 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; // Fallbacks to non-IR instruction implementations - case IrCmd::LOP_SETLIST: + case IrCmd::SETLIST: { Label next; - emitInstSetList(build, next, vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d), uintOp(inst.e)); + regs.assertAllFree(); + emitInstSetList(regs, build, next, vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d), uintOp(inst.e)); build.setLabel(next); break; } - case IrCmd::LOP_CALL: + case IrCmd::CALL: + regs.assertAllFree(); emitInstCall(build, helpers, vmRegOp(inst.a), intOp(inst.b), intOp(inst.c)); break; - case IrCmd::LOP_RETURN: + case IrCmd::RETURN: + regs.assertAllFree(); emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b)); break; - case IrCmd::LOP_FORGLOOP: + case IrCmd::FORGLOOP: + regs.assertAllFree(); emitinstForGLoop(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c), labelOp(inst.d)); break; - case IrCmd::LOP_FORGLOOP_FALLBACK: - emitinstForGLoopFallback(build, uintOp(inst.a), vmRegOp(inst.b), intOp(inst.c), labelOp(inst.d)); - build.jmp(labelOp(inst.e)); + case IrCmd::FORGLOOP_FALLBACK: + regs.assertAllFree(); + emitinstForGLoopFallback(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c)); + build.jmp(labelOp(inst.d)); break; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: + regs.assertAllFree(); emitInstForGPrepXnextFallback(build, uintOp(inst.a), vmRegOp(inst.b), labelOp(inst.c)); break; - case IrCmd::LOP_AND: + case IrCmd::AND: + regs.assertAllFree(); emitInstAnd(build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); break; - case IrCmd::LOP_ANDK: + case IrCmd::ANDK: + regs.assertAllFree(); emitInstAndK(build, vmRegOp(inst.a), vmRegOp(inst.b), vmConstOp(inst.c)); break; - case IrCmd::LOP_OR: + case IrCmd::OR: + regs.assertAllFree(); emitInstOr(build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); break; - case IrCmd::LOP_ORK: + case IrCmd::ORK: + regs.assertAllFree(); emitInstOrK(build, vmRegOp(inst.a), vmRegOp(inst.b), vmConstOp(inst.c)); break; - case IrCmd::LOP_COVERAGE: + case IrCmd::COVERAGE: + regs.assertAllFree(); emitInstCoverage(build, uintOp(inst.a)); break; @@ -984,12 +993,14 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_GETGLOBAL, uintOp(inst.a)); break; case IrCmd::FALLBACK_SETGLOBAL: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_SETGLOBAL, uintOp(inst.a)); break; case IrCmd::FALLBACK_GETTABLEKS: @@ -997,6 +1008,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_GETTABLEKS, uintOp(inst.a)); break; case IrCmd::FALLBACK_SETTABLEKS: @@ -1004,6 +1016,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_SETTABLEKS, uintOp(inst.a)); break; case IrCmd::FALLBACK_NAMECALL: @@ -1011,32 +1024,38 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_NAMECALL, uintOp(inst.a)); break; case IrCmd::FALLBACK_PREPVARARGS: LUAU_ASSERT(inst.b.kind == IrOpKind::Constant); + regs.assertAllFree(); emitFallback(build, data, LOP_PREPVARARGS, uintOp(inst.a)); break; case IrCmd::FALLBACK_GETVARARGS: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::Constant); + regs.assertAllFree(); emitFallback(build, data, LOP_GETVARARGS, uintOp(inst.a)); break; case IrCmd::FALLBACK_NEWCLOSURE: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::Constant); + regs.assertAllFree(); emitFallback(build, data, LOP_NEWCLOSURE, uintOp(inst.a)); break; case IrCmd::FALLBACK_DUPCLOSURE: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); + regs.assertAllFree(); emitFallback(build, data, LOP_DUPCLOSURE, uintOp(inst.a)); break; case IrCmd::FALLBACK_FORGPREP: + regs.assertAllFree(); emitFallback(build, data, LOP_FORGPREP, uintOp(inst.a)); break; default: diff --git a/CodeGen/src/IrLoweringX64.h b/CodeGen/src/IrLoweringX64.h index c8ebd1f1..ecaa6a1d 100644 --- a/CodeGen/src/IrLoweringX64.h +++ b/CodeGen/src/IrLoweringX64.h @@ -3,8 +3,7 @@ #include "Luau/AssemblyBuilderX64.h" #include "Luau/IrData.h" - -#include "IrRegAllocX64.h" +#include "Luau/IrRegAllocX64.h" #include diff --git a/CodeGen/src/IrRegAllocA64.cpp b/CodeGen/src/IrRegAllocA64.cpp new file mode 100644 index 00000000..dc18ab56 --- /dev/null +++ b/CodeGen/src/IrRegAllocA64.cpp @@ -0,0 +1,174 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#include "IrRegAllocA64.h" + +#ifdef _MSC_VER +#include +#endif + +namespace Luau +{ +namespace CodeGen +{ +namespace A64 +{ + +inline int setBit(uint32_t n) +{ + LUAU_ASSERT(n); + +#ifdef _MSC_VER + unsigned long rl; + _BitScanReverse(&rl, n); + return int(rl); +#else + return 31 - __builtin_clz(n); +#endif +} + +IrRegAllocA64::IrRegAllocA64(IrFunction& function, std::initializer_list> regs) + : function(function) +{ + for (auto& p : regs) + { + LUAU_ASSERT(p.first.kind == p.second.kind && p.first.index <= p.second.index); + + Set& set = getSet(p.first.kind); + + for (int i = p.first.index; i <= p.second.index; ++i) + set.base |= 1u << i; + } + + gpr.free = gpr.base; + simd.free = simd.base; +} + +RegisterA64 IrRegAllocA64::allocReg(KindA64 kind) +{ + Set& set = getSet(kind); + + if (set.free == 0) + { + LUAU_ASSERT(!"Out of registers to allocate"); + return noreg; + } + + int index = setBit(set.free); + set.free &= ~(1u << index); + + return RegisterA64{kind, uint8_t(index)}; +} + +RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind) +{ + Set& set = getSet(kind); + + if (set.free == 0) + { + LUAU_ASSERT(!"Out of registers to allocate"); + return noreg; + } + + int index = setBit(set.free); + + set.free &= ~(1u << index); + set.temp |= 1u << index; + + return RegisterA64{kind, uint8_t(index)}; +} + +RegisterA64 IrRegAllocA64::allocReuse(KindA64 kind, uint32_t index, std::initializer_list oprefs) +{ + for (IrOp op : oprefs) + { + if (op.kind != IrOpKind::Inst) + continue; + + IrInst& source = function.instructions[op.index]; + + if (source.lastUse == index && !source.reusedReg) + { + LUAU_ASSERT(source.regA64.kind == kind); + + source.reusedReg = true; + return source.regA64; + } + } + + return allocReg(kind); +} + +void IrRegAllocA64::freeReg(RegisterA64 reg) +{ + Set& set = getSet(reg.kind); + + LUAU_ASSERT((set.base & (1u << reg.index)) != 0); + LUAU_ASSERT((set.free & (1u << reg.index)) == 0); + set.free |= 1u << reg.index; +} + +void IrRegAllocA64::freeLastUseReg(IrInst& target, uint32_t index) +{ + if (target.lastUse == index && !target.reusedReg) + { + // Register might have already been freed if it had multiple uses inside a single instruction + if (target.regA64 == noreg) + return; + + freeReg(target.regA64); + target.regA64 = noreg; + } +} + +void IrRegAllocA64::freeLastUseRegs(const IrInst& inst, uint32_t index) +{ + auto checkOp = [this, index](IrOp op) { + if (op.kind == IrOpKind::Inst) + freeLastUseReg(function.instructions[op.index], index); + }; + + checkOp(inst.a); + checkOp(inst.b); + checkOp(inst.c); + checkOp(inst.d); + checkOp(inst.e); + checkOp(inst.f); +} + +void IrRegAllocA64::freeTempRegs() +{ + LUAU_ASSERT((gpr.free & gpr.temp) == 0); + gpr.free |= gpr.temp; + gpr.temp = 0; + + LUAU_ASSERT((simd.free & simd.temp) == 0); + simd.free |= simd.temp; + simd.temp = 0; +} + +void IrRegAllocA64::assertAllFree() const +{ + LUAU_ASSERT(gpr.free == gpr.base); + LUAU_ASSERT(simd.free == simd.base); +} + +IrRegAllocA64::Set& IrRegAllocA64::getSet(KindA64 kind) +{ + switch (kind) + { + case KindA64::x: + case KindA64::w: + return gpr; + + case KindA64::d: + case KindA64::q: + return simd; + + default: + LUAU_ASSERT(!"Unexpected register kind"); + LUAU_UNREACHABLE(); + } +} + +} // namespace A64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/src/IrRegAllocA64.h b/CodeGen/src/IrRegAllocA64.h new file mode 100644 index 00000000..2ed0787a --- /dev/null +++ b/CodeGen/src/IrRegAllocA64.h @@ -0,0 +1,55 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#pragma once + +#include "Luau/IrData.h" +#include "Luau/RegisterA64.h" + +#include +#include + +namespace Luau +{ +namespace CodeGen +{ +namespace A64 +{ + +struct IrRegAllocA64 +{ + IrRegAllocA64(IrFunction& function, std::initializer_list> regs); + + RegisterA64 allocReg(KindA64 kind); + RegisterA64 allocTemp(KindA64 kind); + RegisterA64 allocReuse(KindA64 kind, uint32_t index, std::initializer_list oprefs); + + void freeReg(RegisterA64 reg); + + void freeLastUseReg(IrInst& target, uint32_t index); + void freeLastUseRegs(const IrInst& inst, uint32_t index); + + void freeTempRegs(); + + void assertAllFree() const; + + IrFunction& function; + + struct Set + { + // which registers are in the set that the allocator manages (initialized at construction) + uint32_t base = 0; + + // which subset of initial set is free + uint32_t free = 0; + + // which subset of initial set is allocated as temporary + uint32_t temp = 0; + }; + + Set gpr, simd; + + Set& getSet(KindA64 kind); +}; + +} // namespace A64 +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/src/IrRegAllocX64.cpp b/CodeGen/src/IrRegAllocX64.cpp index c527d033..eeb6cfe6 100644 --- a/CodeGen/src/IrRegAllocX64.cpp +++ b/CodeGen/src/IrRegAllocX64.cpp @@ -1,19 +1,5 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details -#include "IrRegAllocX64.h" - -#include "Luau/CodeGen.h" -#include "Luau/DenseHash.h" -#include "Luau/IrAnalysis.h" -#include "Luau/IrDump.h" -#include "Luau/IrUtils.h" - -#include "EmitCommonX64.h" -#include "EmitInstructionX64.h" -#include "NativeState.h" - -#include "lstate.h" - -#include +#include "Luau/IrRegAllocX64.h" namespace Luau { @@ -108,13 +94,21 @@ RegisterX64 IrRegAllocX64::allocXmmRegOrReuse(uint32_t index, std::initializer_l return allocXmmReg(); } -RegisterX64 IrRegAllocX64::takeGprReg(RegisterX64 reg) +RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg) { // In a more advanced register allocator, this would require a spill for the current register user // But at the current stage we don't have register live ranges intersecting forced register uses - LUAU_ASSERT(freeGprMap[reg.index]); + if (reg.size == SizeX64::xmmword) + { + LUAU_ASSERT(freeXmmMap[reg.index]); + freeXmmMap[reg.index] = false; + } + else + { + LUAU_ASSERT(freeGprMap[reg.index]); + freeGprMap[reg.index] = false; + } - freeGprMap[reg.index] = false; return reg; } @@ -134,7 +128,7 @@ void IrRegAllocX64::freeReg(RegisterX64 reg) void IrRegAllocX64::freeLastUseReg(IrInst& target, uint32_t index) { - if (target.lastUse == index && !target.reusedReg) + if (isLastUseReg(target, index)) { // Register might have already been freed if it had multiple uses inside a single instruction if (target.regX64 == noreg) @@ -160,6 +154,35 @@ void IrRegAllocX64::freeLastUseRegs(const IrInst& inst, uint32_t index) checkOp(inst.f); } +bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t index) const +{ + return target.lastUse == index && !target.reusedReg; +} + +bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const +{ + if (reg == noreg) + return false; + + LUAU_ASSERT(reg.size != SizeX64::xmmword); + + for (RegisterX64 gpr : kGprAllocOrder) + { + if (reg.index == gpr.index) + return true; + } + + return false; +} + +void IrRegAllocX64::assertFree(RegisterX64 reg) const +{ + if (reg.size == SizeX64::xmmword) + LUAU_ASSERT(freeXmmMap[reg.index]); + else + LUAU_ASSERT(freeGprMap[reg.index]); +} + void IrRegAllocX64::assertAllFree() const { for (RegisterX64 reg : kGprAllocOrder) @@ -211,6 +234,13 @@ void ScopedRegX64::free() reg = noreg; } +RegisterX64 ScopedRegX64::release() +{ + RegisterX64 tmp = reg; + reg = noreg; + return tmp; +} + } // namespace X64 } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/IrTranslateBuiltins.cpp b/CodeGen/src/IrTranslateBuiltins.cpp index cb8e4148..2955aaff 100644 --- a/CodeGen/src/IrTranslateBuiltins.cpp +++ b/CodeGen/src/IrTranslateBuiltins.cpp @@ -6,7 +6,6 @@ #include "lstate.h" -// TODO: should be possible to handle fastcalls in contexts where nresults is -1 by adding the adjustment instruction // TODO: when nresults is less than our actual result count, we can skip computing/writing unused results namespace Luau @@ -26,8 +25,8 @@ BuiltinImplResult translateBuiltinNumberToNumber( build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 1}; } @@ -43,8 +42,8 @@ BuiltinImplResult translateBuiltin2NumberToNumber( build.loadAndCheckTag(args, LUA_TNUMBER, fallback); build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO:tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 1}; } @@ -59,8 +58,9 @@ BuiltinImplResult translateBuiltinNumberTo2Number( build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: some tag updates might not be required, we place them here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + build.inst(IrCmd::STORE_TAG, build.vmReg(ra + 1), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 2}; @@ -131,8 +131,8 @@ BuiltinImplResult translateBuiltinMathLog( build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 1}; } @@ -210,6 +210,44 @@ BuiltinImplResult translateBuiltinMathClamp(IrBuilder& build, int nparams, int r return {BuiltinImplType::UsesFallback, 1}; } +BuiltinImplResult translateBuiltinMathUnary(IrBuilder& build, IrCmd cmd, int nparams, int ra, int arg, int nresults, IrOp fallback) +{ + if (nparams < 1 || nresults > 1) + return {BuiltinImplType::None, -1}; + + build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); + + IrOp varg = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg)); + IrOp result = build.inst(cmd, varg); + + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result); + + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + + return {BuiltinImplType::UsesFallback, 1}; +} + +BuiltinImplResult translateBuiltinMathBinary(IrBuilder& build, IrCmd cmd, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback) +{ + if (nparams < 2 || nresults > 1) + return {BuiltinImplType::None, -1}; + + build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); + build.loadAndCheckTag(args, LUA_TNUMBER, fallback); + + IrOp lhs = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg)); + IrOp rhs = build.inst(IrCmd::LOAD_DOUBLE, args); + IrOp result = build.inst(cmd, lhs, rhs); + + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result); + + if (ra != arg) + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); + + return {BuiltinImplType::UsesFallback, 1}; +} + BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback) { if (nparams < 1 || nresults > 1) @@ -218,7 +256,6 @@ BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, in build.inst( IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING)); return {BuiltinImplType::UsesFallback, 1}; @@ -232,7 +269,6 @@ BuiltinImplResult translateBuiltinTypeof(IrBuilder& build, int nparams, int ra, build.inst( IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); - // TODO: tag update might not be required, we place it here now because FASTCALL is not modeled in constant propagation yet build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING)); return {BuiltinImplType::UsesFallback, 1}; @@ -261,9 +297,17 @@ BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, case LBF_MATH_CLAMP: return translateBuiltinMathClamp(build, nparams, ra, arg, args, nresults, fallback); case LBF_MATH_FLOOR: + return translateBuiltinMathUnary(build, IrCmd::FLOOR_NUM, nparams, ra, arg, nresults, fallback); case LBF_MATH_CEIL: + return translateBuiltinMathUnary(build, IrCmd::CEIL_NUM, nparams, ra, arg, nresults, fallback); case LBF_MATH_SQRT: + return translateBuiltinMathUnary(build, IrCmd::SQRT_NUM, nparams, ra, arg, nresults, fallback); case LBF_MATH_ABS: + return translateBuiltinMathUnary(build, IrCmd::ABS_NUM, nparams, ra, arg, nresults, fallback); + case LBF_MATH_ROUND: + return translateBuiltinMathUnary(build, IrCmd::ROUND_NUM, nparams, ra, arg, nresults, fallback); + case LBF_MATH_POW: + return translateBuiltinMathBinary(build, IrCmd::POW_NUM, nparams, ra, arg, args, nresults, fallback); case LBF_MATH_EXP: case LBF_MATH_ASIN: case LBF_MATH_SIN: @@ -275,11 +319,9 @@ BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, case LBF_MATH_TAN: case LBF_MATH_TANH: case LBF_MATH_LOG10: - case LBF_MATH_ROUND: case LBF_MATH_SIGN: return translateBuiltinNumberToNumber(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback); case LBF_MATH_FMOD: - case LBF_MATH_POW: case LBF_MATH_ATAN2: case LBF_MATH_LDEXP: return translateBuiltin2NumberToNumber(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback); diff --git a/CodeGen/src/IrTranslation.cpp b/CodeGen/src/IrTranslation.cpp index d90841ce..e366888e 100644 --- a/CodeGen/src/IrTranslation.cpp +++ b/CodeGen/src/IrTranslation.cpp @@ -296,46 +296,60 @@ static void translateInstBinaryNumeric(IrBuilder& build, int ra, int rb, int rc, IrOp vb = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(rb)); IrOp vc; + IrOp result; + if (opc.kind == IrOpKind::VmConst) { LUAU_ASSERT(build.function.proto); TValue protok = build.function.proto->k[opc.index]; LUAU_ASSERT(protok.tt == LUA_TNUMBER); - vc = build.constDouble(protok.value.n); + + // VM has special cases for exponentiation with constants + if (tm == TM_POW && protok.value.n == 0.5) + result = build.inst(IrCmd::SQRT_NUM, vb); + else if (tm == TM_POW && protok.value.n == 2.0) + result = build.inst(IrCmd::MUL_NUM, vb, vb); + else if (tm == TM_POW && protok.value.n == 3.0) + result = build.inst(IrCmd::MUL_NUM, vb, build.inst(IrCmd::MUL_NUM, vb, vb)); + else + vc = build.constDouble(protok.value.n); } else { vc = build.inst(IrCmd::LOAD_DOUBLE, opc); } - IrOp va; - - switch (tm) + if (result.kind == IrOpKind::None) { - case TM_ADD: - va = build.inst(IrCmd::ADD_NUM, vb, vc); - break; - case TM_SUB: - va = build.inst(IrCmd::SUB_NUM, vb, vc); - break; - case TM_MUL: - va = build.inst(IrCmd::MUL_NUM, vb, vc); - break; - case TM_DIV: - va = build.inst(IrCmd::DIV_NUM, vb, vc); - break; - case TM_MOD: - va = build.inst(IrCmd::MOD_NUM, vb, vc); - break; - case TM_POW: - va = build.inst(IrCmd::POW_NUM, vb, vc); - break; - default: - LUAU_ASSERT(!"unsupported binary op"); + LUAU_ASSERT(vc.kind != IrOpKind::None); + + switch (tm) + { + case TM_ADD: + result = build.inst(IrCmd::ADD_NUM, vb, vc); + break; + case TM_SUB: + result = build.inst(IrCmd::SUB_NUM, vb, vc); + break; + case TM_MUL: + result = build.inst(IrCmd::MUL_NUM, vb, vc); + break; + case TM_DIV: + result = build.inst(IrCmd::DIV_NUM, vb, vc); + break; + case TM_MOD: + result = build.inst(IrCmd::MOD_NUM, vb, vc); + break; + case TM_POW: + result = build.inst(IrCmd::POW_NUM, vb, vc); + break; + default: + LUAU_ASSERT(!"unsupported binary op"); + } } - build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), va); + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result); if (ra != rb && ra != rc) // TODO: optimization should handle second check, but we'll test this later build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); @@ -638,7 +652,7 @@ void translateInstForGPrepNext(IrBuilder& build, const Instruction* pc, int pcpo build.inst(IrCmd::JUMP, target); build.beginBlock(fallback); - build.inst(IrCmd::LOP_FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); + build.inst(IrCmd::FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); } void translateInstForGPrepInext(IrBuilder& build, const Instruction* pc, int pcpos) @@ -670,7 +684,7 @@ void translateInstForGPrepInext(IrBuilder& build, const Instruction* pc, int pcp build.inst(IrCmd::JUMP, target); build.beginBlock(fallback); - build.inst(IrCmd::LOP_FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); + build.inst(IrCmd::FORGPREP_XNEXT_FALLBACK, build.constUint(pcpos), build.vmReg(ra), target); } void translateInstForGLoopIpairs(IrBuilder& build, const Instruction* pc, int pcpos) @@ -721,7 +735,8 @@ void translateInstForGLoopIpairs(IrBuilder& build, const Instruction* pc, int pc build.inst(IrCmd::JUMP, loopRepeat); build.beginBlock(fallback); - build.inst(IrCmd::LOP_FORGLOOP_FALLBACK, build.constUint(pcpos), build.vmReg(ra), build.constInt(int(pc[1])), loopRepeat, loopExit); + build.inst(IrCmd::SET_SAVEDPC, build.constUint(pcpos + 1)); + build.inst(IrCmd::FORGLOOP_FALLBACK, build.vmReg(ra), build.constInt(int(pc[1])), loopRepeat, loopExit); // Fallthrough in original bytecode is implicit, so we start next internal block here if (build.isInternalBlock(loopExit)) diff --git a/CodeGen/src/IrUtils.cpp b/CodeGen/src/IrUtils.cpp index b28ce596..45e2bae0 100644 --- a/CodeGen/src/IrUtils.cpp +++ b/CodeGen/src/IrUtils.cpp @@ -320,6 +320,26 @@ void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint3 if (inst.a.kind == IrOpKind::Constant) substitute(function, inst, build.constDouble(-function.doubleOp(inst.a))); break; + case IrCmd::FLOOR_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(floor(function.doubleOp(inst.a)))); + break; + case IrCmd::CEIL_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(ceil(function.doubleOp(inst.a)))); + break; + case IrCmd::ROUND_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(round(function.doubleOp(inst.a)))); + break; + case IrCmd::SQRT_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(sqrt(function.doubleOp(inst.a)))); + break; + case IrCmd::ABS_NUM: + if (inst.a.kind == IrOpKind::Constant) + substitute(function, inst, build.constDouble(fabs(function.doubleOp(inst.a)))); + break; case IrCmd::NOT_ANY: if (inst.a.kind == IrOpKind::Constant) { diff --git a/CodeGen/src/NativeState.cpp b/CodeGen/src/NativeState.cpp index f1497890..ddc9c03d 100644 --- a/CodeGen/src/NativeState.cpp +++ b/CodeGen/src/NativeState.cpp @@ -109,6 +109,8 @@ void initHelperFunctions(NativeState& data) data.context.forgPrepXnextFallback = forgPrepXnextFallback; data.context.callProlog = callProlog; data.context.callEpilogC = callEpilogC; + + data.context.callFallback = callFallback; data.context.returnFallback = returnFallback; } diff --git a/CodeGen/src/NativeState.h b/CodeGen/src/NativeState.h index 6d833189..2d97e63c 100644 --- a/CodeGen/src/NativeState.h +++ b/CodeGen/src/NativeState.h @@ -101,7 +101,9 @@ struct NativeContext void (*forgPrepXnextFallback)(lua_State* L, TValue* ra, int pc) = nullptr; Closure* (*callProlog)(lua_State* L, TValue* ra, StkId argtop, int nresults) = nullptr; void (*callEpilogC)(lua_State* L, int nresults, int n) = nullptr; - const Instruction* (*returnFallback)(lua_State* L, StkId ra, int n) = nullptr; + + Closure* (*callFallback)(lua_State* L, StkId ra, StkId argtop, int nresults) = nullptr; + Closure* (*returnFallback)(lua_State* L, StkId ra, int n) = nullptr; // Opcode fallbacks, implemented in C NativeFallback fallback[LOP__COUNT] = {}; diff --git a/CodeGen/src/OptimizeConstProp.cpp b/CodeGen/src/OptimizeConstProp.cpp index 67236476..f767f549 100644 --- a/CodeGen/src/OptimizeConstProp.cpp +++ b/CodeGen/src/OptimizeConstProp.cpp @@ -503,10 +503,10 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& } } break; - case IrCmd::LOP_AND: - case IrCmd::LOP_ANDK: - case IrCmd::LOP_OR: - case IrCmd::LOP_ORK: + case IrCmd::AND: + case IrCmd::ANDK: + case IrCmd::OR: + case IrCmd::ORK: state.invalidate(inst.a); break; case IrCmd::FASTCALL: @@ -533,6 +533,11 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& case IrCmd::MIN_NUM: case IrCmd::MAX_NUM: case IrCmd::UNM_NUM: + case IrCmd::FLOOR_NUM: + case IrCmd::CEIL_NUM: + case IrCmd::ROUND_NUM: + case IrCmd::SQRT_NUM: + case IrCmd::ABS_NUM: case IrCmd::NOT_ANY: case IrCmd::JUMP: case IrCmd::JUMP_EQ_POINTER: @@ -547,10 +552,10 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& case IrCmd::CHECK_SLOT_MATCH: case IrCmd::CHECK_NODE_NO_NEXT: case IrCmd::BARRIER_TABLE_BACK: - case IrCmd::LOP_RETURN: - case IrCmd::LOP_COVERAGE: + case IrCmd::RETURN: + case IrCmd::COVERAGE: case IrCmd::SET_UPVALUE: - case IrCmd::LOP_SETLIST: // We don't track table state that this can invalidate + case IrCmd::SETLIST: // We don't track table state that this can invalidate case IrCmd::SET_SAVEDPC: // TODO: we may be able to remove some updates to PC case IrCmd::CLOSE_UPVALS: // Doesn't change memory that we track case IrCmd::CAPTURE: @@ -599,18 +604,18 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& case IrCmd::INTERRUPT: state.invalidateUserCall(); break; - case IrCmd::LOP_CALL: + case IrCmd::CALL: state.invalidateRegistersFrom(inst.a.index); state.invalidateUserCall(); break; - case IrCmd::LOP_FORGLOOP: + case IrCmd::FORGLOOP: state.invalidateRegistersFrom(inst.a.index + 2); // Rn and Rn+1 are not modified break; - case IrCmd::LOP_FORGLOOP_FALLBACK: - state.invalidateRegistersFrom(inst.b.index + 2); // Rn and Rn+1 are not modified + case IrCmd::FORGLOOP_FALLBACK: + state.invalidateRegistersFrom(inst.a.index + 2); // Rn and Rn+1 are not modified state.invalidateUserCall(); break; - case IrCmd::LOP_FORGPREP_XNEXT_FALLBACK: + case IrCmd::FORGPREP_XNEXT_FALLBACK: // This fallback only conditionally throws an exception break; case IrCmd::FALLBACK_GETGLOBAL: diff --git a/Compiler/src/Compiler.cpp b/Compiler/src/Compiler.cpp index 03f4b3e6..9478404a 100644 --- a/Compiler/src/Compiler.cpp +++ b/Compiler/src/Compiler.cpp @@ -25,8 +25,6 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineThreshold, 25) LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300) LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5) -LUAU_FASTFLAGVARIABLE(LuauCompileBuiltinArity, false) - namespace Luau { @@ -295,7 +293,7 @@ struct Compiler // handles builtin calls that can't be constant-folded but are known to return one value // note: optimizationLevel check is technically redundant but it's important that we never optimize based on builtins in O1 - if (FFlag::LuauCompileBuiltinArity && options.optimizationLevel >= 2) + if (options.optimizationLevel >= 2) if (int* bfid = builtins.find(expr)) return getBuiltinInfo(*bfid).results != 1; @@ -766,7 +764,7 @@ struct Compiler { if (!isExprMultRet(expr->args.data[expr->args.size - 1])) return compileExprFastcallN(expr, target, targetCount, targetTop, multRet, regs, bfid); - else if (FFlag::LuauCompileBuiltinArity && options.optimizationLevel >= 2 && int(expr->args.size) == getBuiltinInfo(bfid).params) + else if (options.optimizationLevel >= 2 && int(expr->args.size) == getBuiltinInfo(bfid).params) return compileExprFastcallN(expr, target, targetCount, targetTop, multRet, regs, bfid); } diff --git a/Sources.cmake b/Sources.cmake index 3f32aab8..3508ec39 100644 --- a/Sources.cmake +++ b/Sources.cmake @@ -65,8 +65,10 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/include/Luau/ConditionX64.h CodeGen/include/Luau/IrAnalysis.h CodeGen/include/Luau/IrBuilder.h + CodeGen/include/Luau/IrCallWrapperX64.h CodeGen/include/Luau/IrDump.h CodeGen/include/Luau/IrData.h + CodeGen/include/Luau/IrRegAllocX64.h CodeGen/include/Luau/IrUtils.h CodeGen/include/Luau/Label.h CodeGen/include/Luau/OperandX64.h @@ -94,9 +96,11 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/src/Fallbacks.cpp CodeGen/src/IrAnalysis.cpp CodeGen/src/IrBuilder.cpp + CodeGen/src/IrCallWrapperX64.cpp CodeGen/src/IrDump.cpp CodeGen/src/IrLoweringA64.cpp CodeGen/src/IrLoweringX64.cpp + CodeGen/src/IrRegAllocA64.cpp CodeGen/src/IrRegAllocX64.cpp CodeGen/src/IrTranslateBuiltins.cpp CodeGen/src/IrTranslation.cpp @@ -122,7 +126,7 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/src/FallbacksProlog.h CodeGen/src/IrLoweringA64.h CodeGen/src/IrLoweringX64.h - CodeGen/src/IrRegAllocX64.h + CodeGen/src/IrRegAllocA64.h CodeGen/src/IrTranslateBuiltins.h CodeGen/src/IrTranslation.h CodeGen/src/NativeState.h @@ -342,6 +346,7 @@ if(TARGET Luau.UnitTest) tests/Fixture.h tests/IostreamOptional.h tests/ScopedFlags.h + tests/AssemblyBuilderA64.test.cpp tests/AssemblyBuilderX64.test.cpp tests/AstJsonEncoder.test.cpp tests/AstQuery.test.cpp @@ -358,6 +363,7 @@ if(TARGET Luau.UnitTest) tests/Error.test.cpp tests/Frontend.test.cpp tests/IrBuilder.test.cpp + tests/IrCallWrapperX64.test.cpp tests/JsonEmitter.test.cpp tests/Lexer.test.cpp tests/Linter.test.cpp diff --git a/VM/src/lbuiltins.cpp b/VM/src/lbuiltins.cpp index 3c669bff..e0dc8a38 100644 --- a/VM/src/lbuiltins.cpp +++ b/VM/src/lbuiltins.cpp @@ -23,8 +23,6 @@ #endif #endif -LUAU_FASTFLAGVARIABLE(LuauBuiltinSSE41, false) - // luauF functions implement FASTCALL instruction that performs a direct execution of some builtin functions from the VM // The rule of thumb is that FASTCALL functions can not call user code, yield, fail, or reallocate stack. // If types of the arguments mismatch, luauF_* needs to return -1 and the execution will fall back to the usual call path @@ -105,9 +103,7 @@ static int luauF_atan(lua_State* L, StkId res, TValue* arg0, int nresults, StkId return -1; } -// TODO: LUAU_NOINLINE can be removed with LuauBuiltinSSE41 LUAU_FASTMATH_BEGIN -LUAU_NOINLINE static int luauF_ceil(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) @@ -170,9 +166,7 @@ static int luauF_exp(lua_State* L, StkId res, TValue* arg0, int nresults, StkId return -1; } -// TODO: LUAU_NOINLINE can be removed with LuauBuiltinSSE41 LUAU_FASTMATH_BEGIN -LUAU_NOINLINE static int luauF_floor(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) @@ -949,9 +943,7 @@ static int luauF_sign(lua_State* L, StkId res, TValue* arg0, int nresults, StkId return -1; } -// TODO: LUAU_NOINLINE can be removed with LuauBuiltinSSE41 LUAU_FASTMATH_BEGIN -LUAU_NOINLINE static int luauF_round(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) @@ -1271,9 +1263,6 @@ LUAU_TARGET_SSE41 inline double roundsd_sse41(double v) LUAU_TARGET_SSE41 static int luauF_floor_sse41(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { - if (!FFlag::LuauBuiltinSSE41) - return luauF_floor(L, res, arg0, nresults, args, nparams); - if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) { double a1 = nvalue(arg0); @@ -1286,9 +1275,6 @@ LUAU_TARGET_SSE41 static int luauF_floor_sse41(lua_State* L, StkId res, TValue* LUAU_TARGET_SSE41 static int luauF_ceil_sse41(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { - if (!FFlag::LuauBuiltinSSE41) - return luauF_ceil(L, res, arg0, nresults, args, nparams); - if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) { double a1 = nvalue(arg0); @@ -1301,9 +1287,6 @@ LUAU_TARGET_SSE41 static int luauF_ceil_sse41(lua_State* L, StkId res, TValue* a LUAU_TARGET_SSE41 static int luauF_round_sse41(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { - if (!FFlag::LuauBuiltinSSE41) - return luauF_round(L, res, arg0, nresults, args, nparams); - if (nparams >= 1 && nresults <= 1 && ttisnumber(arg0)) { double a1 = nvalue(arg0); diff --git a/fuzz/format.cpp b/fuzz/format.cpp index 3ad3912f..4b943bf1 100644 --- a/fuzz/format.cpp +++ b/fuzz/format.cpp @@ -1,6 +1,7 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #include "Luau/Common.h" +#include #include #include diff --git a/fuzz/linter.cpp b/fuzz/linter.cpp index 66ca5bb1..854c6327 100644 --- a/fuzz/linter.cpp +++ b/fuzz/linter.cpp @@ -3,10 +3,10 @@ #include "Luau/BuiltinDefinitions.h" #include "Luau/Common.h" +#include "Luau/Frontend.h" #include "Luau/Linter.h" #include "Luau/ModuleResolver.h" #include "Luau/Parser.h" -#include "Luau/TypeInfer.h" extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { @@ -18,18 +18,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) Luau::ParseResult parseResult = Luau::Parser::parse(reinterpret_cast(Data), Size, names, allocator, options); // "static" here is to accelerate fuzzing process by only creating and populating the type environment once - static Luau::NullModuleResolver moduleResolver; - static Luau::InternalErrorReporter iceHandler; - static Luau::TypeChecker sharedEnv(&moduleResolver, &iceHandler); - static int once = (Luau::registerBuiltinGlobals(sharedEnv), 1); + static Luau::NullFileResolver fileResolver; + static Luau::NullConfigResolver configResolver; + static Luau::Frontend frontend{&fileResolver, &configResolver}; + static int once = (Luau::registerBuiltinGlobals(frontend), 1); (void)once; - static int once2 = (Luau::freeze(sharedEnv.globalTypes), 1); + static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1); (void)once2; if (parseResult.errors.empty()) { - Luau::TypeChecker typeck(&moduleResolver, &iceHandler); - typeck.globalScope = sharedEnv.globalScope; + Luau::TypeChecker typeck(frontend.globals.globalScope, &frontend.moduleResolver, frontend.builtinTypes, &frontend.iceHandler); Luau::LintOptions lintOptions; lintOptions.warningMask = ~0ull; diff --git a/fuzz/proto.cpp b/fuzz/proto.cpp index c94f0889..ffeb4919 100644 --- a/fuzz/proto.cpp +++ b/fuzz/proto.cpp @@ -261,8 +261,8 @@ DEFINE_PROTO_FUZZER(const luau::ModuleSet& message) { static FuzzFileResolver fileResolver; static FuzzConfigResolver configResolver; - static Luau::FrontendOptions options{true, true}; - static Luau::Frontend frontend(&fileResolver, &configResolver, options); + static Luau::FrontendOptions defaultOptions{/*retainFullTypeGraphs*/ true, /*forAutocomplete*/ false, /*runLintChecks*/ kFuzzLinter}; + static Luau::Frontend frontend(&fileResolver, &configResolver, defaultOptions); static int once = (setupFrontend(frontend), 0); (void)once; @@ -285,16 +285,12 @@ DEFINE_PROTO_FUZZER(const luau::ModuleSet& message) try { - Luau::CheckResult result = frontend.check(name, std::nullopt); - - // lint (note that we need access to types so we need to do this with typeck in scope) - if (kFuzzLinter && result.errors.empty()) - frontend.lint(name, std::nullopt); + frontend.check(name); // Second pass in strict mode (forced by auto-complete) - Luau::FrontendOptions opts; - opts.forAutocomplete = true; - frontend.check(name, opts); + Luau::FrontendOptions options = defaultOptions; + options.forAutocomplete = true; + frontend.check(name, options); } catch (std::exception&) { diff --git a/fuzz/typeck.cpp b/fuzz/typeck.cpp index a6c9ae28..4f8f8857 100644 --- a/fuzz/typeck.cpp +++ b/fuzz/typeck.cpp @@ -3,9 +3,9 @@ #include "Luau/BuiltinDefinitions.h" #include "Luau/Common.h" +#include "Luau/Frontend.h" #include "Luau/ModuleResolver.h" #include "Luau/Parser.h" -#include "Luau/TypeInfer.h" LUAU_FASTINT(LuauTypeInferRecursionLimit) LUAU_FASTINT(LuauTypeInferTypePackLoopLimit) @@ -23,23 +23,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) Luau::ParseResult parseResult = Luau::Parser::parse(reinterpret_cast(Data), Size, names, allocator, options); // "static" here is to accelerate fuzzing process by only creating and populating the type environment once - static Luau::NullModuleResolver moduleResolver; - static Luau::InternalErrorReporter iceHandler; - static Luau::TypeChecker sharedEnv(&moduleResolver, &iceHandler); - static int once = (Luau::registerBuiltinGlobals(sharedEnv), 1); + static Luau::NullFileResolver fileResolver; + static Luau::NullConfigResolver configResolver; + static Luau::Frontend frontend{&fileResolver, &configResolver}; + static int once = (Luau::registerBuiltinGlobals(frontend), 1); (void)once; - static int once2 = (Luau::freeze(sharedEnv.globalTypes), 1); + static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1); (void)once2; if (parseResult.errors.empty()) { + Luau::TypeChecker typeck(frontend.globals.globalScope, &frontend.moduleResolver, frontend.builtinTypes, &frontend.iceHandler); + Luau::SourceModule module; module.root = parseResult.root; module.mode = Luau::Mode::Nonstrict; - Luau::TypeChecker typeck(&moduleResolver, &iceHandler); - typeck.globalScope = sharedEnv.globalScope; - try { typeck.check(module, Luau::Mode::Nonstrict); diff --git a/tests/AssemblyBuilderA64.test.cpp b/tests/AssemblyBuilderA64.test.cpp index a68932ba..1690c748 100644 --- a/tests/AssemblyBuilderA64.test.cpp +++ b/tests/AssemblyBuilderA64.test.cpp @@ -32,9 +32,9 @@ static std::string bytecodeAsArray(const std::vector& code) class AssemblyBuilderA64Fixture { public: - bool check(void (*f)(AssemblyBuilderA64& build), std::vector code, std::vector data = {}) + bool check(void (*f)(AssemblyBuilderA64& build), std::vector code, std::vector data = {}, unsigned int features = 0) { - AssemblyBuilderA64 build(/* logText= */ false); + AssemblyBuilderA64 build(/* logText= */ false, features); f(build); @@ -285,6 +285,87 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOfLabel") // clang-format on } +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPBasic") +{ + SINGLE_COMPARE(fmov(d0, d1), 0x1E604020); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPMath") +{ + SINGLE_COMPARE(fabs(d1, d2), 0x1E60C041); + SINGLE_COMPARE(fadd(d1, d2, d3), 0x1E632841); + SINGLE_COMPARE(fdiv(d1, d2, d3), 0x1E631841); + SINGLE_COMPARE(fmul(d1, d2, d3), 0x1E630841); + SINGLE_COMPARE(fneg(d1, d2), 0x1E614041); + SINGLE_COMPARE(fsqrt(d1, d2), 0x1E61C041); + SINGLE_COMPARE(fsub(d1, d2, d3), 0x1E633841); + + SINGLE_COMPARE(frinta(d1, d2), 0x1E664041); + SINGLE_COMPARE(frintm(d1, d2), 0x1E654041); + SINGLE_COMPARE(frintp(d1, d2), 0x1E64C041); + + SINGLE_COMPARE(fcvtzs(w1, d2), 0x1E780041); + SINGLE_COMPARE(fcvtzs(x1, d2), 0x9E780041); + SINGLE_COMPARE(fcvtzu(w1, d2), 0x1E790041); + SINGLE_COMPARE(fcvtzu(x1, d2), 0x9E790041); + + SINGLE_COMPARE(scvtf(d1, w2), 0x1E620041); + SINGLE_COMPARE(scvtf(d1, x2), 0x9E620041); + SINGLE_COMPARE(ucvtf(d1, w2), 0x1E630041); + SINGLE_COMPARE(ucvtf(d1, x2), 0x9E630041); + + CHECK(check( + [](AssemblyBuilderA64& build) { + build.fjcvtzs(w1, d2); + }, + {0x1E7E0041}, {}, A64::Feature_JSCVT)); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPLoadStore") +{ + // address forms + SINGLE_COMPARE(ldr(d0, x1), 0xFD400020); + SINGLE_COMPARE(ldr(d0, mem(x1, 8)), 0xFD400420); + SINGLE_COMPARE(ldr(d0, mem(x1, x7)), 0xFC676820); + SINGLE_COMPARE(ldr(d0, mem(x1, -7)), 0xFC5F9020); + SINGLE_COMPARE(str(d0, x1), 0xFD000020); + SINGLE_COMPARE(str(d0, mem(x1, 8)), 0xFD000420); + SINGLE_COMPARE(str(d0, mem(x1, x7)), 0xFC276820); + SINGLE_COMPARE(str(d0, mem(x1, -7)), 0xFC1F9020); + + // load/store sizes + SINGLE_COMPARE(ldr(d0, x1), 0xFD400020); + SINGLE_COMPARE(ldr(q0, x1), 0x3DC00020); + SINGLE_COMPARE(str(d0, x1), 0xFD000020); + SINGLE_COMPARE(str(q0, x1), 0x3D800020); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPCompare") +{ + SINGLE_COMPARE(fcmp(d0, d1), 0x1E612000); + SINGLE_COMPARE(fcmpz(d1), 0x1E602028); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOffsetSize") +{ + SINGLE_COMPARE(ldr(w0, mem(x1, 16)), 0xB9401020); + SINGLE_COMPARE(ldr(x0, mem(x1, 16)), 0xF9400820); + SINGLE_COMPARE(ldr(d0, mem(x1, 16)), 0xFD400820); + SINGLE_COMPARE(ldr(q0, mem(x1, 16)), 0x3DC00420); + + SINGLE_COMPARE(str(w0, mem(x1, 16)), 0xB9001020); + SINGLE_COMPARE(str(x0, mem(x1, 16)), 0xF9000820); + SINGLE_COMPARE(str(d0, mem(x1, 16)), 0xFD000820); + SINGLE_COMPARE(str(q0, mem(x1, 16)), 0x3D800420); +} + +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "ConditionalSelect") +{ + SINGLE_COMPARE(csel(x0, x1, x2, ConditionA64::Equal), 0x9A820020); + SINGLE_COMPARE(csel(w0, w1, w2, ConditionA64::Equal), 0x1A820020); + SINGLE_COMPARE(fcsel(d0, d1, d2, ConditionA64::Equal), 0x1E620C20); +} + TEST_CASE("LogTest") { AssemblyBuilderA64 build(/* logText= */ true); @@ -309,6 +390,14 @@ TEST_CASE("LogTest") build.ldp(x0, x1, mem(x8, 8)); build.adr(x0, l); + build.fabs(d1, d2); + build.ldr(q1, x2); + + build.csel(x0, x1, x2, ConditionA64::Equal); + + build.fcmp(d0, d1); + build.fcmpz(d0); + build.setLabel(l); build.ret(); @@ -331,6 +420,11 @@ TEST_CASE("LogTest") cbz x7,.L1 ldp x0,x1,[x8,#8] adr x0,.L1 + fabs d1,d2 + ldr q1,[x2] + csel x0,x1,x2,eq + fcmp d0,d1 + fcmp d0,#0 .L1: ret )"; diff --git a/tests/Autocomplete.test.cpp b/tests/Autocomplete.test.cpp index aedb50ab..53dc99e1 100644 --- a/tests/Autocomplete.test.cpp +++ b/tests/Autocomplete.test.cpp @@ -2995,8 +2995,6 @@ TEST_CASE_FIXTURE(ACFixture, "autocomplete_string_singletons") TEST_CASE_FIXTURE(ACFixture, "string_singleton_as_table_key") { - ScopedFastFlag sff{"LuauCompleteTableKeysBetter", true}; - check(R"( type Direction = "up" | "down" diff --git a/tests/Compiler.test.cpp b/tests/Compiler.test.cpp index c9d0c01d..cabf1cce 100644 --- a/tests/Compiler.test.cpp +++ b/tests/Compiler.test.cpp @@ -4691,8 +4691,6 @@ RETURN R0 0 TEST_CASE("LoopUnrollCost") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - ScopedFastInt sfis[] = { {"LuauCompileLoopUnrollThreshold", 25}, {"LuauCompileLoopUnrollThresholdMaxBoost", 300}, @@ -5962,8 +5960,6 @@ RETURN R2 1 TEST_CASE("InlineMultret") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - // inlining a function in multret context is prohibited since we can't adjust L->top outside of CALL/GETVARARGS CHECK_EQ("\n" + compileFunction(R"( local function foo(a) @@ -6301,8 +6297,6 @@ RETURN R0 52 TEST_CASE("BuiltinFoldingProhibited") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - CHECK_EQ("\n" + compileFunction(R"( return math.abs(), @@ -6905,8 +6899,6 @@ L3: RETURN R0 0 TEST_CASE("BuiltinArity") { - ScopedFastFlag sff("LuauCompileBuiltinArity", true); - // by default we can't assume that we know parameter/result count for builtins as they can be overridden at runtime CHECK_EQ("\n" + compileFunction(R"( return math.abs(unknown()) diff --git a/tests/Conformance.test.cpp b/tests/Conformance.test.cpp index 1072b95d..957d3271 100644 --- a/tests/Conformance.test.cpp +++ b/tests/Conformance.test.cpp @@ -504,7 +504,7 @@ TEST_CASE("Types") Luau::InternalErrorReporter iceHandler; Luau::BuiltinTypes builtinTypes; Luau::GlobalTypes globals{Luau::NotNull{&builtinTypes}}; - Luau::TypeChecker env(globals, &moduleResolver, Luau::NotNull{&builtinTypes}, &iceHandler); + Luau::TypeChecker env(globals.globalScope, &moduleResolver, Luau::NotNull{&builtinTypes}, &iceHandler); Luau::registerBuiltinGlobals(env, globals); Luau::freeze(globals.globalTypes); diff --git a/tests/ConstraintGraphBuilderFixture.cpp b/tests/ConstraintGraphBuilderFixture.cpp index cc239b7e..d34b86bd 100644 --- a/tests/ConstraintGraphBuilderFixture.cpp +++ b/tests/ConstraintGraphBuilderFixture.cpp @@ -31,8 +31,7 @@ void ConstraintGraphBuilderFixture::generateConstraints(const std::string& code) void ConstraintGraphBuilderFixture::solve(const std::string& code) { generateConstraints(code); - ConstraintSolver cs{NotNull{&normalizer}, NotNull{rootScope}, constraints, "MainModule", NotNull{mainModule->reduction.get()}, - NotNull(&moduleResolver), {}, &logger}; + ConstraintSolver cs{NotNull{&normalizer}, NotNull{rootScope}, constraints, "MainModule", NotNull(&moduleResolver), {}, &logger}; cs.run(); } diff --git a/tests/IrBuilder.test.cpp b/tests/IrBuilder.test.cpp index f4c9cdca..c1392c9d 100644 --- a/tests/IrBuilder.test.cpp +++ b/tests/IrBuilder.test.cpp @@ -42,7 +42,7 @@ public: f(a); build.beginBlock(a); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); }; template @@ -56,10 +56,10 @@ public: f(a, b); build.beginBlock(a); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(b); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); }; void checkEq(IrOp instOp, const IrInst& inst) @@ -94,10 +94,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptCheckTag") build.inst(IrCmd::CHECK_TAG, tag1, build.constTag(0), fallback); IrOp tag2 = build.inst(IrCmd::LOAD_TAG, build.vmConst(5)); build.inst(IrCmd::CHECK_TAG, tag2, build.constTag(0), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -107,10 +107,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptCheckTag") bb_0: CHECK_TAG R2, tnil, bb_fallback_1 CHECK_TAG K5, tnil, bb_fallback_1 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -123,7 +123,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptBinaryArith") IrOp opA = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(1)); IrOp opB = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(2)); build.inst(IrCmd::ADD_NUM, opA, opB); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -133,7 +133,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptBinaryArith") bb_0: %0 = LOAD_DOUBLE R1 %2 = ADD_NUM %0, R2 - LOP_RETURN 0u + RETURN 0u )"); } @@ -150,10 +150,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptEqTag1") build.inst(IrCmd::JUMP_EQ_TAG, opA, opB, trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -165,10 +165,10 @@ bb_0: JUMP_EQ_TAG R1, %1, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -186,10 +186,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptEqTag2") build.inst(IrCmd::JUMP_EQ_TAG, opA, opB, trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -203,10 +203,10 @@ bb_0: JUMP_EQ_TAG R2, %0, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -224,10 +224,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptEqTag3") build.inst(IrCmd::JUMP_EQ_TAG, opA, build.constTag(0), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -241,10 +241,10 @@ bb_0: JUMP_EQ_TAG %2, tnil, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -261,10 +261,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FinalX64OptJumpCmpNum") build.inst(IrCmd::JUMP_CMP_NUM, opA, opB, trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); optimizeMemoryOperandsX64(build.function); @@ -276,10 +276,10 @@ bb_0: JUMP_CMP_NUM R1, %1, bb_1, bb_2 bb_1: - LOP_RETURN 0u + RETURN 0u bb_2: - LOP_RETURN 0u + RETURN 0u )"); } @@ -317,7 +317,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "Numeric") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(0), build.inst(IrCmd::INT_TO_NUM, build.constInt(8))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constantFold(); @@ -342,7 +342,7 @@ bb_0: STORE_INT R0, 1i STORE_INT R0, 0i STORE_DOUBLE R0, 8 - LOP_RETURN 0u + RETURN 0u )"); } @@ -373,25 +373,25 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN 1u + RETURN 1u bb_3: JUMP bb_5 bb_5: - LOP_RETURN 2u + RETURN 2u bb_6: JUMP bb_7 bb_7: - LOP_RETURN 1u + RETURN 1u bb_9: JUMP bb_11 bb_11: - LOP_RETURN 2u + RETURN 2u )"); } @@ -400,18 +400,18 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NumToIndex") { withOneBlock([this](IrOp a) { build.inst(IrCmd::STORE_INT, build.vmReg(0), build.inst(IrCmd::TRY_NUM_TO_INDEX, build.constDouble(4), a)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); withOneBlock([this](IrOp a) { build.inst(IrCmd::STORE_INT, build.vmReg(0), build.inst(IrCmd::TRY_NUM_TO_INDEX, build.constDouble(1.2), a)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); withOneBlock([this](IrOp a) { IrOp nan = build.inst(IrCmd::DIV_NUM, build.constDouble(0.0), build.constDouble(0.0)); build.inst(IrCmd::STORE_INT, build.vmReg(0), build.inst(IrCmd::TRY_NUM_TO_INDEX, nan, a)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); updateUseCounts(build.function); @@ -420,19 +420,19 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NumToIndex") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: STORE_INT R0, 4i - LOP_RETURN 0u + RETURN 0u bb_2: JUMP bb_3 bb_3: - LOP_RETURN 1u + RETURN 1u bb_4: JUMP bb_5 bb_5: - LOP_RETURN 1u + RETURN 1u )"); } @@ -441,12 +441,12 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "Guards") { withOneBlock([this](IrOp a) { build.inst(IrCmd::CHECK_TAG, build.constTag(tnumber), build.constTag(tnumber), a); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); withOneBlock([this](IrOp a) { build.inst(IrCmd::CHECK_TAG, build.constTag(tnil), build.constTag(tnumber), a); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); }); updateUseCounts(build.function); @@ -454,13 +454,13 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "Guards") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: - LOP_RETURN 0u + RETURN 0u bb_2: JUMP bb_3 bb_3: - LOP_RETURN 1u + RETURN 1u )"); } @@ -568,7 +568,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RememberTagsAndValues") build.inst(IrCmd::STORE_INT, build.vmReg(10), build.inst(IrCmd::LOAD_INT, build.vmReg(1))); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(11), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(2))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -593,7 +593,7 @@ bb_0: STORE_INT R10, %20 %22 = LOAD_DOUBLE R2 STORE_DOUBLE R11, %22 - LOP_RETURN 0u + RETURN 0u )"); } @@ -614,7 +614,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "PropagateThroughTvalue") build.inst(IrCmd::STORE_TAG, build.vmReg(3), build.inst(IrCmd::LOAD_TAG, build.vmReg(1))); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(3), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(1))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -627,7 +627,7 @@ bb_0: STORE_TVALUE R1, %2 STORE_TAG R3, tnumber STORE_DOUBLE R3, 0.5 - LOP_RETURN 0u + RETURN 0u )"); } @@ -641,10 +641,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipCheckTag") build.inst(IrCmd::STORE_TAG, build.vmReg(0), build.constTag(tnumber)); build.inst(IrCmd::CHECK_TAG, build.inst(IrCmd::LOAD_TAG, build.vmReg(0)), build.constTag(tnumber), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -652,7 +652,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipCheckTag") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: STORE_TAG R0, tnumber - LOP_RETURN 0u + RETURN 0u )"); } @@ -671,7 +671,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipOncePerBlockChecks") build.inst(IrCmd::DO_LEN, build.vmReg(1), build.vmReg(2)); // Can make env unsafe build.inst(IrCmd::CHECK_SAFE_ENV); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -682,7 +682,7 @@ bb_0: CHECK_GC DO_LEN R1, R2 CHECK_SAFE_ENV - LOP_RETURN 0u + RETURN 0u )"); } @@ -707,10 +707,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RememberTableState") build.inst(IrCmd::CHECK_NO_METATABLE, table, fallback); build.inst(IrCmd::CHECK_READONLY, table, fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -723,10 +723,10 @@ bb_0: DO_LEN R1, R2 CHECK_NO_METATABLE %0, bb_fallback_1 CHECK_READONLY %0, bb_fallback_1 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -742,7 +742,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipUselessBarriers") build.inst(IrCmd::BARRIER_TABLE_FORWARD, table, build.vmReg(0)); IrOp something = build.inst(IrCmd::LOAD_POINTER, build.vmReg(2)); build.inst(IrCmd::BARRIER_OBJ, something, build.vmReg(0)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -750,7 +750,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SkipUselessBarriers") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: STORE_TAG R0, tnumber - LOP_RETURN 0u + RETURN 0u )"); } @@ -773,7 +773,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "ConcatInvalidation") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(6), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(2))); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(7), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(3))); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -792,7 +792,7 @@ bb_0: %9 = LOAD_DOUBLE R2 STORE_DOUBLE R6, %9 STORE_DOUBLE R7, 2 - LOP_RETURN 0u + RETURN 0u )"); } @@ -819,10 +819,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "BuiltinFastcallsMayInvalidateMemory") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(1), build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(0))); // At least R0 wasn't touched - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -837,10 +837,10 @@ bb_0: CHECK_NO_METATABLE %1, bb_fallback_1 CHECK_READONLY %1, bb_fallback_1 STORE_DOUBLE R1, 0.5 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -855,7 +855,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RedundantStoreCheckConstantType") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(0), build.constDouble(0.5)); build.inst(IrCmd::STORE_INT, build.vmReg(0), build.constInt(10)); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -865,7 +865,7 @@ bb_0: STORE_INT R0, 10i STORE_DOUBLE R0, 0.5 STORE_INT R0, 10i - LOP_RETURN 0u + RETURN 0u )"); } @@ -882,10 +882,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagCheckPropagation") build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnumber), fallback); build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnumber), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -894,10 +894,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagCheckPropagation") bb_0: %0 = LOAD_TAG R0 CHECK_TAG %0, tnumber, bb_fallback_1 - LOP_RETURN 0u + RETURN 0u bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -914,10 +914,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagCheckPropagationConflicting") build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnumber), fallback); build.inst(IrCmd::CHECK_TAG, unknown, build.constTag(tnil), fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(0)); + build.inst(IrCmd::RETURN, build.constUint(0)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -929,7 +929,7 @@ bb_0: JUMP bb_fallback_1 bb_fallback_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -947,13 +947,13 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TruthyTestRemoval") build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(1), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(3)); + build.inst(IrCmd::RETURN, build.constUint(3)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -965,10 +965,10 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN 1u + RETURN 1u bb_fallback_3: - LOP_RETURN 3u + RETURN 3u )"); } @@ -986,13 +986,13 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FalsyTestRemoval") build.inst(IrCmd::JUMP_IF_FALSY, build.vmReg(1), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); build.beginBlock(fallback); - build.inst(IrCmd::LOP_RETURN, build.constUint(3)); + build.inst(IrCmd::RETURN, build.constUint(3)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1004,10 +1004,10 @@ bb_0: JUMP bb_2 bb_2: - LOP_RETURN 2u + RETURN 2u bb_fallback_3: - LOP_RETURN 3u + RETURN 3u )"); } @@ -1024,10 +1024,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "TagEqRemoval") build.inst(IrCmd::JUMP_EQ_TAG, tag, build.constTag(tnumber), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1039,7 +1039,7 @@ bb_0: JUMP bb_2 bb_2: - LOP_RETURN 2u + RETURN 2u )"); } @@ -1056,10 +1056,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "IntEqRemoval") build.inst(IrCmd::JUMP_EQ_INT, value, build.constInt(5), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1070,7 +1070,7 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN 1u + RETURN 1u )"); } @@ -1087,10 +1087,10 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NumCmpRemoval") build.inst(IrCmd::JUMP_CMP_NUM, value, build.constDouble(8.0), build.cond(IrCondition::Greater), trueBlock, falseBlock); build.beginBlock(trueBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(falseBlock); - build.inst(IrCmd::LOP_RETURN, build.constUint(2)); + build.inst(IrCmd::RETURN, build.constUint(2)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1101,7 +1101,7 @@ bb_0: JUMP bb_2 bb_2: - LOP_RETURN 2u + RETURN 2u )"); } @@ -1118,7 +1118,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "DataFlowsThroughDirectJumpToUniqueSuccessor build.beginBlock(block2); build.inst(IrCmd::STORE_TAG, build.vmReg(1), build.inst(IrCmd::LOAD_TAG, build.vmReg(0))); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1130,7 +1130,7 @@ bb_0: bb_1: STORE_TAG R1, tnumber - LOP_RETURN 1u + RETURN 1u )"); } @@ -1148,7 +1148,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "DataDoesNotFlowThroughDirectJumpToNonUnique build.beginBlock(block2); build.inst(IrCmd::STORE_TAG, build.vmReg(1), build.inst(IrCmd::LOAD_TAG, build.vmReg(0))); - build.inst(IrCmd::LOP_RETURN, build.constUint(1)); + build.inst(IrCmd::RETURN, build.constUint(1)); build.beginBlock(block3); build.inst(IrCmd::JUMP, block2); @@ -1164,7 +1164,7 @@ bb_0: bb_1: %2 = LOAD_TAG R0 STORE_TAG R1, %2 - LOP_RETURN 1u + RETURN 1u bb_2: JUMP bb_1 @@ -1183,7 +1183,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "EntryBlockUseRemoval") build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(0), exit, repeat); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(repeat); build.inst(IrCmd::INTERRUPT, build.constUint(0)); @@ -1198,7 +1198,7 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1211,14 +1211,14 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RecursiveSccUseRemoval1") IrOp repeat = build.block(IrBlockKind::Internal); build.beginBlock(entry); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(block); build.inst(IrCmd::STORE_TAG, build.vmReg(0), build.constTag(tnumber)); build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(0), exit, repeat); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(repeat); build.inst(IrCmd::INTERRUPT, build.constUint(0)); @@ -1229,14 +1229,14 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RecursiveSccUseRemoval1") CHECK("\n" + toString(build.function, /* includeUseInfo */ false) == R"( bb_0: - LOP_RETURN R0, 0i + RETURN R0, 0i bb_1: STORE_TAG R0, tnumber JUMP bb_2 bb_2: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1253,14 +1253,14 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "RecursiveSccUseRemoval2") build.inst(IrCmd::JUMP_EQ_INT, build.constInt(0), build.constInt(1), block, exit1); build.beginBlock(exit1); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(block); build.inst(IrCmd::STORE_TAG, build.vmReg(0), build.constTag(tnumber)); build.inst(IrCmd::JUMP_IF_TRUTHY, build.vmReg(0), exit2, repeat); build.beginBlock(exit2); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(repeat); build.inst(IrCmd::INTERRUPT, build.constUint(0)); @@ -1274,14 +1274,14 @@ bb_0: JUMP bb_1 bb_1: - LOP_RETURN R0, 0i + RETURN R0, 0i bb_2: STORE_TAG R0, tnumber JUMP bb_3 bb_3: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1322,7 +1322,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SimplePathExtraction") build.inst(IrCmd::JUMP, block4); build.beginBlock(block4); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1350,10 +1350,10 @@ bb_4: JUMP bb_5 bb_5: - LOP_RETURN R0, 0i + RETURN R0, 0i bb_linear_6: - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1393,11 +1393,11 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "NoPathExtractionForBlocksWithLiveOutValues" build.beginBlock(block4a); build.inst(IrCmd::STORE_TAG, build.vmReg(0), tag3a); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); build.beginBlock(block4b); build.inst(IrCmd::STORE_TAG, build.vmReg(0), tag3a); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(0)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(0)); updateUseCounts(build.function); constPropInBlockChains(build); @@ -1427,11 +1427,11 @@ bb_4: bb_5: STORE_TAG R0, %10 - LOP_RETURN R0, 0i + RETURN R0, 0i bb_6: STORE_TAG R0, %10 - LOP_RETURN R0, 0i + RETURN R0, 0i )"); } @@ -1488,7 +1488,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SimpleDiamond") build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(2), build.constInt(2)); + build.inst(IrCmd::RETURN, build.vmReg(2), build.constInt(2)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1522,7 +1522,7 @@ bb_2: bb_3: ; predecessors: bb_1, bb_2 ; in regs: R2, R3 - LOP_RETURN R2, 2i + RETURN R2, 2i )"); } @@ -1534,11 +1534,11 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "ImplicitFixedRegistersInVarargCall") build.beginBlock(entry); build.inst(IrCmd::FALLBACK_GETVARARGS, build.constUint(0), build.vmReg(3), build.constInt(-1)); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(5)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(5)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(5)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(5)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1549,13 +1549,13 @@ bb_0: ; in regs: R0, R1, R2 ; out regs: R0, R1, R2, R3, R4 FALLBACK_GETVARARGS 0u, R3, -1i - LOP_CALL R0, -1i, 5i + CALL R0, -1i, 5i JUMP bb_1 bb_1: ; predecessors: bb_0 ; in regs: R0, R1, R2, R3, R4 - LOP_RETURN R0, 5i + RETURN R0, 5i )"); } @@ -1573,7 +1573,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "ExplicitUseOfRegisterInVarargSequence") build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1590,7 +1590,7 @@ bb_0: bb_1: ; predecessors: bb_0 ; in regs: R0... - LOP_RETURN R0, -1i + RETURN R0, -1i )"); } @@ -1601,12 +1601,12 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "VariadicSequenceRestart") IrOp exit = build.block(IrBlockKind::Internal); build.beginBlock(entry); - build.inst(IrCmd::LOP_CALL, build.vmReg(1), build.constInt(0), build.constInt(-1)); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(1), build.constInt(0), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1616,14 +1616,14 @@ bb_0: ; successors: bb_1 ; in regs: R0, R1 ; out regs: R0... - LOP_CALL R1, 0i, -1i - LOP_CALL R0, -1i, -1i + CALL R1, 0i, -1i + CALL R0, -1i, -1i JUMP bb_1 bb_1: ; predecessors: bb_0 ; in regs: R0... - LOP_RETURN R0, -1i + RETURN R0, -1i )"); } @@ -1637,15 +1637,15 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "FallbackDoesNotFlowUp") build.beginBlock(entry); build.inst(IrCmd::FALLBACK_GETVARARGS, build.constUint(0), build.vmReg(1), build.constInt(-1)); build.inst(IrCmd::CHECK_TAG, build.inst(IrCmd::LOAD_TAG, build.vmReg(0)), build.constTag(tnumber), fallback); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(fallback); - build.inst(IrCmd::LOP_CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); + build.inst(IrCmd::CALL, build.vmReg(0), build.constInt(-1), build.constInt(-1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1658,7 +1658,7 @@ bb_0: FALLBACK_GETVARARGS 0u, R1, -1i %1 = LOAD_TAG R0 CHECK_TAG %1, tnumber, bb_fallback_1 - LOP_CALL R0, -1i, -1i + CALL R0, -1i, -1i JUMP bb_2 bb_fallback_1: @@ -1666,13 +1666,13 @@ bb_fallback_1: ; successors: bb_2 ; in regs: R0, R1... ; out regs: R0... - LOP_CALL R0, -1i, -1i + CALL R0, -1i, -1i JUMP bb_2 bb_2: ; predecessors: bb_0, bb_fallback_1 ; in regs: R0... - LOP_RETURN R0, -1i + RETURN R0, -1i )"); } @@ -1697,7 +1697,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "VariadicSequencePeeling") build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(2), build.constInt(-1)); + build.inst(IrCmd::RETURN, build.vmReg(2), build.constInt(-1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1732,7 +1732,7 @@ bb_2: bb_3: ; predecessors: bb_1, bb_2 ; in regs: R2... - LOP_RETURN R2, -1i + RETURN R2, -1i )"); } @@ -1746,11 +1746,11 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "BuiltinVariadicStart") build.inst(IrCmd::STORE_DOUBLE, build.vmReg(1), build.constDouble(1.0)); build.inst(IrCmd::STORE_DOUBLE, build.vmReg(2), build.constDouble(2.0)); build.inst(IrCmd::ADJUST_STACK_TO_REG, build.vmReg(2), build.constInt(1)); - build.inst(IrCmd::LOP_CALL, build.vmReg(1), build.constInt(-1), build.constInt(1)); + build.inst(IrCmd::CALL, build.vmReg(1), build.constInt(-1), build.constInt(1)); build.inst(IrCmd::JUMP, exit); build.beginBlock(exit); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(2)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(2)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1763,13 +1763,13 @@ bb_0: STORE_DOUBLE R1, 1 STORE_DOUBLE R2, 2 ADJUST_STACK_TO_REG R2, 1i - LOP_CALL R1, -1i, 1i + CALL R1, -1i, 1i JUMP bb_1 bb_1: ; predecessors: bb_0 ; in regs: R0, R1 - LOP_RETURN R0, 2i + RETURN R0, 2i )"); } @@ -1781,7 +1781,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SetTable") build.beginBlock(entry); build.inst(IrCmd::SET_TABLE, build.vmReg(0), build.vmReg(1), build.constUint(1)); - build.inst(IrCmd::LOP_RETURN, build.vmReg(0), build.constInt(1)); + build.inst(IrCmd::RETURN, build.vmReg(0), build.constInt(1)); updateUseCounts(build.function); computeCfgInfo(build.function); @@ -1790,7 +1790,7 @@ TEST_CASE_FIXTURE(IrBuilderFixture, "SetTable") bb_0: ; in regs: R0, R1 SET_TABLE R0, R1, 1u - LOP_RETURN R0, 1i + RETURN R0, 1i )"); } diff --git a/tests/IrCallWrapperX64.test.cpp b/tests/IrCallWrapperX64.test.cpp new file mode 100644 index 00000000..8c7b1393 --- /dev/null +++ b/tests/IrCallWrapperX64.test.cpp @@ -0,0 +1,484 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#include "Luau/IrCallWrapperX64.h" +#include "Luau/IrRegAllocX64.h" + +#include "doctest.h" + +using namespace Luau::CodeGen; +using namespace Luau::CodeGen::X64; + +class IrCallWrapperX64Fixture +{ +public: + IrCallWrapperX64Fixture() + : build(/* logText */ true, ABIX64::Windows) + , regs(function) + , callWrap(regs, build, ~0u) + { + } + + void checkMatch(std::string expected) + { + regs.assertAllFree(); + + build.finalize(); + + CHECK("\n" + build.text == expected); + } + + AssemblyBuilderX64 build; + IrFunction function; + IrRegAllocX64 regs; + IrCallWrapperX64 callWrap; + + // Tests rely on these to force interference between registers + static constexpr RegisterX64 rArg1 = rcx; + static constexpr RegisterX64 rArg1d = ecx; + static constexpr RegisterX64 rArg2 = rdx; + static constexpr RegisterX64 rArg2d = edx; + static constexpr RegisterX64 rArg3 = r8; + static constexpr RegisterX64 rArg3d = r8d; + static constexpr RegisterX64 rArg4 = r9; + static constexpr RegisterX64 rArg4d = r9d; +}; + +TEST_SUITE_BEGIN("IrCallWrapperX64"); + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "SimpleRegs") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rax)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, tmp1); + callWrap.addArgument(SizeX64::qword, tmp2); // Already in its place + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,rax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "TrickyUse1") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1.reg); // Already in its place + callWrap.addArgument(SizeX64::qword, tmp1.release()); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rdx,rcx + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "TrickyUse2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg]); + callWrap.addArgument(SizeX64::qword, tmp1.release()); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rdx,rcx + mov rcx,qword ptr [rcx] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "SimpleMemImm") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rax)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rsi)}; + callWrap.addArgument(SizeX64::dword, 32); + callWrap.addArgument(SizeX64::dword, -1); + callWrap.addArgument(SizeX64::qword, qword[r14 + 32]); + callWrap.addArgument(SizeX64::qword, qword[tmp1.release() + tmp2.release()]); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov r8,qword ptr [r14+020h] + mov r9,qword ptr [rax+rsi] + mov ecx,20h + mov edx,FFFFFFFFh + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "SimpleStackArgs") +{ + ScopedRegX64 tmp{regs, regs.takeReg(rax)}; + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.addArgument(SizeX64::qword, qword[r14 + 16]); + callWrap.addArgument(SizeX64::qword, qword[r14 + 32]); + callWrap.addArgument(SizeX64::qword, qword[r14 + 48]); + callWrap.addArgument(SizeX64::dword, 1); + callWrap.addArgument(SizeX64::qword, qword[r13]); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rdx,qword ptr [r13] + mov qword ptr [rsp+028h],rdx + mov rcx,rax + mov rdx,qword ptr [r14+010h] + mov r8,qword ptr [r14+020h] + mov r9,qword ptr [r14+030h] + mov dword ptr [rsp+020h],1 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "FixedRegisters") +{ + callWrap.addArgument(SizeX64::dword, 1); + callWrap.addArgument(SizeX64::qword, 2); + callWrap.addArgument(SizeX64::qword, 3); + callWrap.addArgument(SizeX64::qword, 4); + callWrap.addArgument(SizeX64::qword, r14); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov qword ptr [rsp+020h],r14 + mov ecx,1 + mov rdx,2 + mov r8,3 + mov r9,4 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "EasyInterference") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rdi)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rsi)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg2)}; + ScopedRegX64 tmp4{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1); + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.addArgument(SizeX64::qword, tmp3); + callWrap.addArgument(SizeX64::qword, tmp4); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov r8,rdx + mov rdx,rsi + mov r9,rcx + mov rcx,rdi + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "FakeInterference") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.release() + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp2.release() + 8]); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,qword ptr [rcx+8] + mov rdx,qword ptr [rdx+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceInt") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg4)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg3)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg2)}; + ScopedRegX64 tmp4{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1); + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.addArgument(SizeX64::qword, tmp3); + callWrap.addArgument(SizeX64::qword, tmp4); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,r9 + mov r9,rcx + mov rcx,rax + mov rax,r8 + mov r8,rdx + mov rdx,rax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceInt2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg4d)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg3d)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg2d)}; + ScopedRegX64 tmp4{regs, regs.takeReg(rArg1d)}; + callWrap.addArgument(SizeX64::dword, tmp1); + callWrap.addArgument(SizeX64::dword, tmp2); + callWrap.addArgument(SizeX64::dword, tmp3); + callWrap.addArgument(SizeX64::dword, tmp4); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov eax,r9d + mov r9d,ecx + mov ecx,eax + mov eax,r8d + mov r8d,edx + mov edx,eax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceFp") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(xmm1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(xmm0)}; + callWrap.addArgument(SizeX64::xmmword, tmp1); + callWrap.addArgument(SizeX64::xmmword, tmp2); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm2,xmm1,xmm1 + vmovsd xmm1,xmm0,xmm0 + vmovsd xmm0,xmm2,xmm2 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardInterferenceBoth") +{ + ScopedRegX64 int1{regs, regs.takeReg(rArg2)}; + ScopedRegX64 int2{regs, regs.takeReg(rArg1)}; + ScopedRegX64 fp1{regs, regs.takeReg(xmm3)}; + ScopedRegX64 fp2{regs, regs.takeReg(xmm2)}; + callWrap.addArgument(SizeX64::qword, int1); + callWrap.addArgument(SizeX64::qword, int2); + callWrap.addArgument(SizeX64::xmmword, fp1); + callWrap.addArgument(SizeX64::xmmword, fp2); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,rdx + mov rdx,rcx + mov rcx,rax + vmovsd xmm0,xmm3,xmm3 + vmovsd xmm3,xmm2,xmm2 + vmovsd xmm2,xmm0,xmm0 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "FakeMultiuseInterferenceMem") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp2.reg + 16]); + tmp1.release(); + tmp2.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,qword ptr [rcx+rdx+8] + mov rdx,qword ptr [rdx+010h] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardMultiuseInterferenceMem1") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + 16]); + tmp1.release(); + tmp2.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,rcx + mov rcx,qword ptr [rax+rdx+8] + mov rdx,qword ptr [rax+010h] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardMultiuseInterferenceMem2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 16]); + tmp1.release(); + tmp2.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,rcx + mov rcx,qword ptr [rax+rdx+8] + mov rdx,qword ptr [rax+rdx+010h] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "HardMultiuseInterferenceMem3") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg3)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + ScopedRegX64 tmp3{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + tmp2.reg + 8]); + callWrap.addArgument(SizeX64::qword, qword[tmp2.reg + tmp3.reg + 16]); + callWrap.addArgument(SizeX64::qword, qword[tmp3.reg + tmp1.reg + 16]); + tmp1.release(); + tmp2.release(); + tmp3.release(); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rax,r8 + mov r8,qword ptr [rcx+rax+010h] + mov rbx,rdx + mov rdx,qword ptr [rbx+rcx+010h] + mov rcx,qword ptr [rax+rbx+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "InterferenceWithCallArg1") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, qword[tmp1.reg + 8]); + callWrap.call(qword[tmp1.release() + 16]); + + checkMatch(R"( + mov rax,rcx + mov rcx,qword ptr [rax+8] + call qword ptr [rax+010h] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "InterferenceWithCallArg2") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, tmp2); + callWrap.call(qword[tmp1.release() + 16]); + + checkMatch(R"( + mov rax,rcx + mov rcx,rdx + call qword ptr [rax+010h] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "InterferenceWithCallArg3") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + callWrap.addArgument(SizeX64::qword, tmp1.reg); + callWrap.call(qword[tmp1.release() + 16]); + + checkMatch(R"( + call qword ptr [rcx+010h] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse1") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(xmm0); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); // Already in its place + callWrap.addArgument(SizeX64::xmmword, qword[r12 + 8]); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm1,qword ptr [r12+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse2") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(xmm0); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + callWrap.addArgument(SizeX64::xmmword, qword[r12 + 8]); + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm1,xmm0,xmm0 + vmovsd xmm0,qword ptr [r12+8] + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse3") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(xmm0); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); + callWrap.addArgument(SizeX64::xmmword, irInst1.regX64, irOp1); + callWrap.call(qword[r12]); + + checkMatch(R"( + vmovsd xmm1,xmm0,xmm0 + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "WithLastIrInstUse4") +{ + IrInst irInst1; + IrOp irOp1 = {IrOpKind::Inst, 0}; + irInst1.regX64 = regs.takeReg(rax); + irInst1.lastUse = 1; + function.instructions.push_back(irInst1); + callWrap.instIdx = irInst1.lastUse; + + ScopedRegX64 tmp{regs, regs.takeReg(rdx)}; + callWrap.addArgument(SizeX64::qword, r15); + callWrap.addArgument(SizeX64::qword, irInst1.regX64, irOp1); + callWrap.addArgument(SizeX64::qword, tmp); + callWrap.call(qword[r12]); + + checkMatch(R"( + mov rcx,r15 + mov r8,rdx + mov rdx,rax + call qword ptr [r12] +)"); +} + +TEST_CASE_FIXTURE(IrCallWrapperX64Fixture, "ExtraCoverage") +{ + ScopedRegX64 tmp1{regs, regs.takeReg(rArg1)}; + ScopedRegX64 tmp2{regs, regs.takeReg(rArg2)}; + callWrap.addArgument(SizeX64::qword, addr[r12 + 8]); + callWrap.addArgument(SizeX64::qword, addr[r12 + 16]); + callWrap.addArgument(SizeX64::xmmword, xmmword[r13]); + callWrap.call(qword[tmp1.release() + tmp2.release()]); + + checkMatch(R"( + vmovups xmm2,xmmword ptr [r13] + mov rax,rcx + lea rcx,none ptr [r12+8] + mov rbx,rdx + lea rdx,none ptr [r12+010h] + call qword ptr [rax+rbx] +)"); +} + +TEST_SUITE_END(); diff --git a/tests/Lexer.test.cpp b/tests/Lexer.test.cpp index 7fcc1e54..78d1389a 100644 --- a/tests/Lexer.test.cpp +++ b/tests/Lexer.test.cpp @@ -157,8 +157,6 @@ TEST_CASE("string_interpolation_basic") TEST_CASE("string_interpolation_full") { - ScopedFastFlag sff("LuauFixInterpStringMid", true); - const std::string testInput = R"(`foo {"bar"} {"baz"} end`)"; Luau::Allocator alloc; AstNameTable table(alloc); diff --git a/tests/Linter.test.cpp b/tests/Linter.test.cpp index 0f134616..8bef5922 100644 --- a/tests/Linter.test.cpp +++ b/tests/Linter.test.cpp @@ -1444,8 +1444,6 @@ TEST_CASE_FIXTURE(Fixture, "LintHygieneUAF") TEST_CASE_FIXTURE(BuiltinsFixture, "DeprecatedApiTyped") { - ScopedFastFlag sff("LuauImproveDeprecatedApiLint", true); - unfreeze(frontend.globals.globalTypes); TypeId instanceType = frontend.globals.globalTypes.addType(ClassType{"Instance", {}, std::nullopt, std::nullopt, {}, {}, "Test"}); persist(instanceType); @@ -1496,8 +1494,6 @@ end TEST_CASE_FIXTURE(BuiltinsFixture, "DeprecatedApiUntyped") { - ScopedFastFlag sff("LuauImproveDeprecatedApiLint", true); - if (TableType* ttv = getMutable(getGlobalBinding(frontend.globals, "table"))) { ttv->props["foreach"].deprecated = true; diff --git a/tests/Normalize.test.cpp b/tests/Normalize.test.cpp index a495ee23..4378bab8 100644 --- a/tests/Normalize.test.cpp +++ b/tests/Normalize.test.cpp @@ -470,7 +470,6 @@ TEST_SUITE_END(); struct NormalizeFixture : Fixture { - ScopedFastFlag sff1{"LuauNegatedFunctionTypes", true}; ScopedFastFlag sff2{"LuauNegatedClassTypes", true}; TypeArena arena; diff --git a/tests/Parser.test.cpp b/tests/Parser.test.cpp index 9ff16d16..ef5aabbe 100644 --- a/tests/Parser.test.cpp +++ b/tests/Parser.test.cpp @@ -1040,8 +1040,6 @@ TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_call_without_parens") TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_expression") { - ScopedFastFlag sff("LuauFixInterpStringMid", true); - try { parse(R"( diff --git a/tests/TypeInfer.aliases.test.cpp b/tests/TypeInfer.aliases.test.cpp index 022abea0..52de15c7 100644 --- a/tests/TypeInfer.aliases.test.cpp +++ b/tests/TypeInfer.aliases.test.cpp @@ -1014,4 +1014,34 @@ TEST_CASE_FIXTURE(Fixture, "another_thing_from_roact") LUAU_REQUIRE_NO_ERRORS(result); } +/* + * It is sometimes possible for type alias resolution to produce a TypeId that + * belongs to a different module. + * + * We must not mutate any fields of the resulting type when this happens. The + * memory has been frozen. + */ +TEST_CASE_FIXTURE(BuiltinsFixture, "alias_expands_to_bare_reference_to_imported_type") +{ + fileResolver.source["game/A"] = R"( + --!strict + export type Object = {[string]: any} + return {} + )"; + + fileResolver.source["game/B"] = R"( + local A = require(script.Parent.A) + + type Object = A.Object + type ReadOnly = T + + local function f(): ReadOnly + return nil :: any + end + )"; + + CheckResult result = frontend.check("game/B"); + LUAU_REQUIRE_NO_ERRORS(result); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.functions.test.cpp b/tests/TypeInfer.functions.test.cpp index c7f9684b..f1d42c6a 100644 --- a/tests/TypeInfer.functions.test.cpp +++ b/tests/TypeInfer.functions.test.cpp @@ -1784,7 +1784,6 @@ z = y -- Not OK, so the line is colorable TEST_CASE_FIXTURE(Fixture, "function_is_supertype_of_concrete_functions") { - ScopedFastFlag sff{"LuauNegatedFunctionTypes", true}; registerHiddenTypes(&frontend); CheckResult result = check(R"( @@ -1803,7 +1802,6 @@ TEST_CASE_FIXTURE(Fixture, "function_is_supertype_of_concrete_functions") TEST_CASE_FIXTURE(Fixture, "concrete_functions_are_not_supertypes_of_function") { - ScopedFastFlag sff{"LuauNegatedFunctionTypes", true}; registerHiddenTypes(&frontend); CheckResult result = check(R"( @@ -1824,7 +1822,6 @@ TEST_CASE_FIXTURE(Fixture, "concrete_functions_are_not_supertypes_of_function") TEST_CASE_FIXTURE(Fixture, "other_things_are_not_related_to_function") { - ScopedFastFlag sff{"LuauNegatedFunctionTypes", true}; registerHiddenTypes(&frontend); CheckResult result = check(R"( diff --git a/tests/TypeInfer.loops.test.cpp b/tests/TypeInfer.loops.test.cpp index 511cbc76..7a134358 100644 --- a/tests/TypeInfer.loops.test.cpp +++ b/tests/TypeInfer.loops.test.cpp @@ -707,4 +707,26 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "cli_68448_iterators_need_not_accept_nil") CHECK(toString(requireType("makeEnum"), {true}) == "({a}) -> {| [a]: a |}"); } +TEST_CASE_FIXTURE(Fixture, "iterate_over_free_table") +{ + CheckResult result = check(R"( + function print(x) end + + function dump(tbl) + print(tbl.whatever) + for k, v in tbl do + print(k) + print(v) + end + end + )"); + + LUAU_REQUIRE_ERROR_COUNT(1, result); + + GenericError* ge = get(result.errors[0]); + REQUIRE(ge); + + CHECK("Cannot iterate over a table without indexer" == ge->message); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.oop.test.cpp b/tests/TypeInfer.oop.test.cpp index eb4937fd..f2b3d055 100644 --- a/tests/TypeInfer.oop.test.cpp +++ b/tests/TypeInfer.oop.test.cpp @@ -381,4 +381,29 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "react_style_oo") CHECK("string" == toString(requireType("hello"))); } +TEST_CASE_FIXTURE(BuiltinsFixture, "cycle_between_object_constructor_and_alias") +{ + CheckResult result = check(R"( + local T = {} + T.__index = T + + function T.new(): T + return setmetatable({}, T) + end + + export type T = typeof(T.new()) + + return T + )"); + + LUAU_REQUIRE_NO_ERRORS(result); + + auto module = getMainModule(); + + REQUIRE(module->exportedTypeBindings.count("T")); + + TypeId aliasType = module->exportedTypeBindings["T"].type; + CHECK_MESSAGE(get(follow(aliasType)), "Expected metatable type but got: " << toString(aliasType)); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.operators.test.cpp b/tests/TypeInfer.operators.test.cpp index 8c289c7b..174bc310 100644 --- a/tests/TypeInfer.operators.test.cpp +++ b/tests/TypeInfer.operators.test.cpp @@ -860,8 +860,6 @@ TEST_CASE_FIXTURE(Fixture, "operator_eq_operands_are_not_subtypes_of_each_other_ TEST_CASE_FIXTURE(Fixture, "operator_eq_completely_incompatible") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local a: string | number = "hi" local b: {x: string}? = {x = "bye"} @@ -970,8 +968,6 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "expected_types_through_binary_or") TEST_CASE_FIXTURE(ClassFixture, "unrelated_classes_cannot_be_compared") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local a = BaseClass.New() local b = UnrelatedClass.New() @@ -984,8 +980,6 @@ TEST_CASE_FIXTURE(ClassFixture, "unrelated_classes_cannot_be_compared") TEST_CASE_FIXTURE(Fixture, "unrelated_primitives_cannot_be_compared") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local c = 5 == true )"); diff --git a/tests/TypeInfer.provisional.test.cpp b/tests/TypeInfer.provisional.test.cpp index 30f77d68..38e7e2f3 100644 --- a/tests/TypeInfer.provisional.test.cpp +++ b/tests/TypeInfer.provisional.test.cpp @@ -176,8 +176,6 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "error_on_eq_metamethod_returning_a_type_othe // We need refine both operands as `never` in the `==` branch. TEST_CASE_FIXTURE(Fixture, "lvalue_equals_another_lvalue_with_no_overlap") { - ScopedFastFlag sff{"LuauIntersectionTestForEquality", true}; - CheckResult result = check(R"( local function f(a: string, b: boolean?) if a == b then diff --git a/tests/TypeInfer.tables.test.cpp b/tests/TypeInfer.tables.test.cpp index 21ac6421..468adc2c 100644 --- a/tests/TypeInfer.tables.test.cpp +++ b/tests/TypeInfer.tables.test.cpp @@ -18,7 +18,6 @@ LUAU_FASTFLAG(LuauLowerBoundsCalculation); LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution); LUAU_FASTFLAG(LuauInstantiateInSubtyping) LUAU_FASTFLAG(LuauTypeMismatchInvarianceInError) -LUAU_FASTFLAG(LuauDontExtendUnsealedRValueTables) TEST_SUITE_BEGIN("TableTests"); @@ -913,10 +912,7 @@ TEST_CASE_FIXTURE(Fixture, "disallow_indexing_into_an_unsealed_table_with_no_ind local k1 = getConstant("key1") )"); - if (FFlag::LuauDontExtendUnsealedRValueTables) - CHECK("any" == toString(requireType("k1"))); - else - CHECK("a" == toString(requireType("k1"))); + CHECK("any" == toString(requireType("k1"))); LUAU_REQUIRE_NO_ERRORS(result); } @@ -3542,8 +3538,6 @@ _ = {_,} TEST_CASE_FIXTURE(Fixture, "when_augmenting_an_unsealed_table_with_an_indexer_apply_the_correct_scope_to_the_indexer_type") { - ScopedFastFlag sff{"LuauDontExtendUnsealedRValueTables", true}; - CheckResult result = check(R"( local events = {} local mockObserveEvent = function(_, key, callback) @@ -3572,8 +3566,6 @@ TEST_CASE_FIXTURE(Fixture, "when_augmenting_an_unsealed_table_with_an_indexer_ap TEST_CASE_FIXTURE(Fixture, "dont_extend_unsealed_tables_in_rvalue_position") { - ScopedFastFlag sff{"LuauDontExtendUnsealedRValueTables", true}; - CheckResult result = check(R"( local testDictionary = { FruitName = "Lemon", diff --git a/tests/TypeInfer.test.cpp b/tests/TypeInfer.test.cpp index 7c4bfb2e..7e317f2e 100644 --- a/tests/TypeInfer.test.cpp +++ b/tests/TypeInfer.test.cpp @@ -1194,7 +1194,6 @@ TEST_CASE_FIXTURE(Fixture, "dcr_delays_expansion_of_function_containing_blocked_ { ScopedFastFlag sff[] = { {"DebugLuauDeferredConstraintResolution", true}, - {"LuauTinyUnifyNormalsFix", true}, // If we run this with error-suppression, it triggers an assertion. // FATAL ERROR: Assertion failed: !"Internal error: Trying to normalize a BlockedType" {"LuauTransitiveSubtyping", false}, diff --git a/tools/faillist.txt b/tools/faillist.txt index 76e5972d..31fc82da 100644 --- a/tools/faillist.txt +++ b/tools/faillist.txt @@ -25,9 +25,6 @@ BuiltinTests.string_format_correctly_ordered_types BuiltinTests.string_format_report_all_type_errors_at_correct_positions BuiltinTests.string_format_tostring_specifier_type_constraint BuiltinTests.string_format_use_correct_argument2 -BuiltinTests.table_pack -BuiltinTests.table_pack_reduce -BuiltinTests.table_pack_variadic DefinitionTests.class_definition_overload_metamethods DefinitionTests.class_definition_string_props GenericsTests.apply_type_function_nested_generics2 @@ -114,7 +111,6 @@ TableTests.table_subtyping_with_missing_props_dont_report_multiple_errors TableTests.table_unification_4 TableTests.used_colon_instead_of_dot TableTests.used_dot_instead_of_colon -ToString.named_metatable_toStringNamedFunction ToString.toStringDetailed2 ToString.toStringErrorPack ToString.toStringNamedFunction_generic_pack @@ -137,6 +133,7 @@ TypeInfer.check_type_infer_recursion_count TypeInfer.cli_50041_committing_txnlog_in_apollo_client_error TypeInfer.dont_report_type_errors_within_an_AstExprError TypeInfer.dont_report_type_errors_within_an_AstStatError +TypeInfer.follow_on_new_types_in_substitution TypeInfer.fuzz_free_table_type_change_during_index_check TypeInfer.infer_assignment_value_types_mutable_lval TypeInfer.no_stack_overflow_from_isoptional diff --git a/tools/natvis/CodeGen.natvis b/tools/natvis/CodeGen.natvis index 5ff6e143..84fb3329 100644 --- a/tools/natvis/CodeGen.natvis +++ b/tools/natvis/CodeGen.natvis @@ -1,45 +1,46 @@ - - noreg - rip + + noreg + rip - al - cl - dl - bl + al + cl + dl + bl - eax - ecx - edx - ebx - esp - ebp - esi - edi - e{(int)index,d}d + eax + ecx + edx + ebx + esp + ebp + esi + edi + e{(int)index,d}d - rax - rcx - rdx - rbx - rsp - rbp - rsi - rdi - r{(int)index,d} + rax + rcx + rdx + rbx + rsp + rbp + rsi + rdi + r{(int)index,d} - xmm{(int)index,d} + xmm{(int)index,d} - ymm{(int)index,d} + ymm{(int)index,d} - + {base} {memSize,en} ptr[{base} + {index}*{(int)scale,d} + {imm}] {memSize,en} ptr[{index}*{(int)scale,d} + {imm}] {memSize,en} ptr[{base} + {imm}] + {memSize,en} ptr[{base} + {imm}] {memSize,en} ptr[{imm}] {imm}