From d141a5c48d7ffda9e992d10ba24d9f6ad242eda9 Mon Sep 17 00:00:00 2001 From: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com> Date: Fri, 14 Apr 2023 21:06:22 +0300 Subject: [PATCH] Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info --- Analysis/include/Luau/AstQuery.h | 3 + Analysis/include/Luau/Frontend.h | 20 +- Analysis/include/Luau/Module.h | 1 + Analysis/include/Luau/Type.h | 14 + Analysis/include/Luau/Unifier.h | 4 +- Analysis/src/AstQuery.cpp | 31 +- Analysis/src/ConstraintSolver.cpp | 18 +- Analysis/src/Frontend.cpp | 154 +++-- Analysis/src/Module.cpp | 23 +- Analysis/src/Type.cpp | 9 + Analysis/src/TypeChecker2.cpp | 66 +- Analysis/src/TypeInfer.cpp | 64 +- Analysis/src/Unifier.cpp | 86 ++- Ast/src/StringUtils.cpp | 8 +- CodeGen/include/Luau/AddressA64.h | 4 +- CodeGen/include/Luau/AssemblyBuilderA64.h | 17 +- CodeGen/include/Luau/IrCallWrapperX64.h | 7 +- CodeGen/include/Luau/IrData.h | 38 +- CodeGen/include/Luau/IrRegAllocX64.h | 23 +- CodeGen/include/Luau/IrUtils.h | 2 +- CodeGen/include/Luau/RegisterA64.h | 12 + CodeGen/include/Luau/RegisterX64.h | 12 + CodeGen/include/Luau/UnwindBuilder.h | 12 +- CodeGen/include/Luau/UnwindBuilderDwarf2.h | 22 +- CodeGen/include/Luau/UnwindBuilderWin.h | 38 +- CodeGen/src/AssemblyBuilderA64.cpp | 87 ++- CodeGen/src/AssemblyBuilderX64.cpp | 11 +- CodeGen/src/BitUtils.h | 36 + CodeGen/src/CodeBlockUnwind.cpp | 52 +- CodeGen/src/CodeGen.cpp | 26 +- CodeGen/src/CodeGenA64.cpp | 132 +++- CodeGen/src/CodeGenA64.h | 2 +- CodeGen/src/CodeGenUtils.cpp | 50 +- CodeGen/src/CodeGenUtils.h | 1 + CodeGen/src/CodeGenX64.cpp | 50 +- CodeGen/src/CodeGenX64.h | 2 +- CodeGen/src/EmitBuiltinsX64.cpp | 78 +-- CodeGen/src/EmitCommon.h | 4 +- CodeGen/src/EmitCommonA64.cpp | 130 ---- CodeGen/src/EmitCommonA64.h | 19 +- CodeGen/src/EmitCommonX64.cpp | 75 +-- CodeGen/src/EmitCommonX64.h | 41 +- CodeGen/src/EmitInstructionA64.cpp | 74 --- CodeGen/src/EmitInstructionA64.h | 24 - CodeGen/src/EmitInstructionX64.cpp | 74 +-- CodeGen/src/EmitInstructionX64.h | 6 +- CodeGen/src/Fallbacks.cpp | 38 ++ CodeGen/src/Fallbacks.h | 1 + CodeGen/src/IrAnalysis.cpp | 2 + CodeGen/src/IrBuilder.cpp | 3 +- CodeGen/src/IrCallWrapperX64.cpp | 83 +-- CodeGen/src/IrLoweringA64.cpp | 733 ++++++++++++++------- CodeGen/src/IrLoweringA64.h | 4 +- CodeGen/src/IrLoweringX64.cpp | 239 ++++--- CodeGen/src/IrRegAllocA64.cpp | 21 +- CodeGen/src/IrRegAllocX64.cpp | 305 +++++---- CodeGen/src/IrTranslateBuiltins.cpp | 70 +- CodeGen/src/IrUtils.cpp | 4 +- CodeGen/src/NativeState.cpp | 26 +- CodeGen/src/NativeState.h | 13 +- CodeGen/src/OptimizeConstProp.cpp | 2 + CodeGen/src/UnwindBuilderDwarf2.cpp | 47 +- CodeGen/src/UnwindBuilderWin.cpp | 112 +++- Sources.cmake | 4 +- VM/src/lapi.cpp | 2 + VM/src/ltable.cpp | 36 +- fuzz/linter.cpp | 2 +- fuzz/proto.cpp | 8 +- fuzz/typeck.cpp | 2 +- tests/AssemblyBuilderA64.test.cpp | 26 +- tests/AssemblyBuilderX64.test.cpp | 10 + tests/Autocomplete.test.cpp | 30 + tests/CodeAllocator.test.cpp | 190 +++++- tests/Conformance.test.cpp | 43 +- tests/Fixture.cpp | 10 +- tests/Module.test.cpp | 39 +- tests/StringUtils.test.cpp | 18 + tests/TypeInfer.annotations.test.cpp | 13 +- tests/TypeInfer.functions.test.cpp | 33 + tests/TypeInfer.operators.test.cpp | 78 ++- tests/TypeInfer.provisional.test.cpp | 36 +- tests/TypeInfer.test.cpp | 15 + tests/TypeInfer.unionTypes.test.cpp | 16 + tests/TypeInfer.unknownnever.test.cpp | 5 - tests/TypeVar.test.cpp | 12 +- tests/conformance/math.lua | 10 + tests/conformance/tables.lua | 7 + tools/lvmexecute_split.py | 2 +- 88 files changed, 2579 insertions(+), 1433 deletions(-) create mode 100644 CodeGen/src/BitUtils.h delete mode 100644 CodeGen/src/EmitCommonA64.cpp delete mode 100644 CodeGen/src/EmitInstructionA64.cpp delete mode 100644 CodeGen/src/EmitInstructionA64.h diff --git a/Analysis/include/Luau/AstQuery.h b/Analysis/include/Luau/AstQuery.h index aa7ef8d3..e7a018c0 100644 --- a/Analysis/include/Luau/AstQuery.h +++ b/Analysis/include/Luau/AstQuery.h @@ -64,8 +64,11 @@ private: }; std::vector findAncestryAtPositionForAutocomplete(const SourceModule& source, Position pos); +std::vector findAncestryAtPositionForAutocomplete(AstStatBlock* root, Position pos); std::vector findAstAncestryOfPosition(const SourceModule& source, Position pos, bool includeTypes = false); +std::vector findAstAncestryOfPosition(AstStatBlock* root, Position pos, bool includeTypes = false); AstNode* findNodeAtPosition(const SourceModule& source, Position pos); +AstNode* findNodeAtPosition(AstStatBlock* root, Position pos); AstExpr* findExprAtPosition(const SourceModule& source, Position pos); ScopePtr findScopeAtPosition(const Module& module, Position pos); std::optional findBindingAtPosition(const Module& module, const SourceModule& source, Position pos); diff --git a/Analysis/include/Luau/Frontend.h b/Analysis/include/Luau/Frontend.h index 82251378..3f41c145 100644 --- a/Analysis/include/Luau/Frontend.h +++ b/Analysis/include/Luau/Frontend.h @@ -165,7 +165,15 @@ struct Frontend bool captureComments, bool typeCheckForAutocomplete = false); private: - ModulePtr check(const SourceModule& sourceModule, Mode mode, std::vector requireCycles, bool forAutocomplete = false, bool recordJsonLog = false); + struct TypeCheckLimits + { + std::optional finishTime; + std::optional instantiationChildLimit; + std::optional unifierIterationLimit; + }; + + ModulePtr check(const SourceModule& sourceModule, Mode mode, std::vector requireCycles, std::optional environmentScope, + bool forAutocomplete, bool recordJsonLog, TypeCheckLimits typeCheckLimits); std::pair getSourceNode(const ModuleName& name); SourceModule parse(const ModuleName& name, std::string_view src, const ParseOptions& parseOptions); @@ -185,15 +193,21 @@ public: const NotNull builtinTypes; FileResolver* fileResolver; + FrontendModuleResolver moduleResolver; FrontendModuleResolver moduleResolverForAutocomplete; + GlobalTypes globals; GlobalTypes globalsForAutocomplete; - TypeChecker typeChecker; - TypeChecker typeCheckerForAutocomplete; + + // TODO: remove with FFlagLuauOnDemandTypecheckers + TypeChecker typeChecker_DEPRECATED; + TypeChecker typeCheckerForAutocomplete_DEPRECATED; + ConfigResolver* configResolver; FrontendOptions options; InternalErrorReporter iceHandler; + std::function prepareModuleScope; std::unordered_map sourceNodes; std::unordered_map sourceModules; diff --git a/Analysis/include/Luau/Module.h b/Analysis/include/Luau/Module.h index 72f87601..1bca7636 100644 --- a/Analysis/include/Luau/Module.h +++ b/Analysis/include/Luau/Module.h @@ -51,6 +51,7 @@ struct SourceModule }; bool isWithinComment(const SourceModule& sourceModule, Position pos); +bool isWithinComment(const ParseResult& result, Position pos); struct RequireCycle { diff --git a/Analysis/include/Luau/Type.h b/Analysis/include/Luau/Type.h index cff86df4..b9544a11 100644 --- a/Analysis/include/Luau/Type.h +++ b/Analysis/include/Luau/Type.h @@ -738,6 +738,7 @@ const T* get(TypeId tv) return get_if(&tv->ty); } + template T* getMutable(TypeId tv) { @@ -897,6 +898,19 @@ bool hasTag(TypeId ty, const std::string& tagName); bool hasTag(const Property& prop, const std::string& tagName); bool hasTag(const Tags& tags, const std::string& tagName); // Do not use in new work. +template +bool hasTypeInIntersection(TypeId ty) +{ + TypeId tf = follow(ty); + if (get(tf)) + return true; + for (auto t : flattenIntersection(tf)) + if (get(follow(t))) + return true; + return false; +} + +bool hasPrimitiveTypeInIntersection(TypeId ty, PrimitiveType::Type primTy); /* * Use this to change the kind of a particular type. * diff --git a/Analysis/include/Luau/Unifier.h b/Analysis/include/Luau/Unifier.h index e7817e57..e3b0a878 100644 --- a/Analysis/include/Luau/Unifier.h +++ b/Analysis/include/Luau/Unifier.h @@ -137,9 +137,9 @@ private: public: // Returns true if the type "needle" already occurs within "haystack" and reports an "infinite type error" - bool occursCheck(TypeId needle, TypeId haystack); + bool occursCheck(TypeId needle, TypeId haystack, bool reversed); bool occursCheck(DenseHashSet& seen, TypeId needle, TypeId haystack); - bool occursCheck(TypePackId needle, TypePackId haystack); + bool occursCheck(TypePackId needle, TypePackId haystack, bool reversed); bool occursCheck(DenseHashSet& seen, TypePackId needle, TypePackId haystack); Unifier makeChildUnifier(); diff --git a/Analysis/src/AstQuery.cpp b/Analysis/src/AstQuery.cpp index dc07a35c..cb3efe6a 100644 --- a/Analysis/src/AstQuery.cpp +++ b/Analysis/src/AstQuery.cpp @@ -211,33 +211,48 @@ struct FindFullAncestry final : public AstVisitor std::vector findAncestryAtPositionForAutocomplete(const SourceModule& source, Position pos) { - AutocompleteNodeFinder finder{pos, source.root}; - source.root->visit(&finder); + return findAncestryAtPositionForAutocomplete(source.root, pos); +} + +std::vector findAncestryAtPositionForAutocomplete(AstStatBlock* root, Position pos) +{ + AutocompleteNodeFinder finder{pos, root}; + root->visit(&finder); return finder.ancestry; } std::vector findAstAncestryOfPosition(const SourceModule& source, Position pos, bool includeTypes) { - const Position end = source.root->location.end; + return findAstAncestryOfPosition(source.root, pos, includeTypes); +} + +std::vector findAstAncestryOfPosition(AstStatBlock* root, Position pos, bool includeTypes) +{ + const Position end = root->location.end; if (pos > end) pos = end; FindFullAncestry finder(pos, end, includeTypes); - source.root->visit(&finder); + root->visit(&finder); return finder.nodes; } AstNode* findNodeAtPosition(const SourceModule& source, Position pos) { - const Position end = source.root->location.end; - if (pos < source.root->location.begin) - return source.root; + return findNodeAtPosition(source.root, pos); +} + +AstNode* findNodeAtPosition(AstStatBlock* root, Position pos) +{ + const Position end = root->location.end; + if (pos < root->location.begin) + return root; if (pos > end) pos = end; FindNode findNode{pos, end}; - findNode.visit(source.root); + findNode.visit(root); return findNode.best; } diff --git a/Analysis/src/ConstraintSolver.cpp b/Analysis/src/ConstraintSolver.cpp index d2bed2da..0fc32c33 100644 --- a/Analysis/src/ConstraintSolver.cpp +++ b/Analysis/src/ConstraintSolver.cpp @@ -595,6 +595,11 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull(leftType) || get(leftType); + bool rightAny = get(rightType) || get(rightType); + bool anyPresent = leftAny || rightAny; + if (isBlocked(leftType) && leftType != resultType) return block(c.leftType, constraint); @@ -604,12 +609,12 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull(leftType) && !isLogical) + if (hasTypeInIntersection(leftType) && !isLogical) return block(leftType, constraint); } // Logical expressions may proceed if the LHS is free. - if (isBlocked(leftType) || (get(leftType) && !isLogical)) + if (isBlocked(leftType) || (hasTypeInIntersection(leftType) && !isLogical)) { asMutable(resultType)->ty.emplace(errorRecoveryType()); unblock(resultType); @@ -696,11 +701,6 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull(leftType) || get(leftType); - bool rightAny = get(rightType) || get(rightType); - bool anyPresent = leftAny || rightAny; - switch (c.op) { // For arithmetic operators, if the LHS is a number, the RHS must be a @@ -711,6 +711,8 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull(leftType) && force) + asMutable(leftType)->ty.emplace(anyPresent ? builtinTypes->anyType : builtinTypes->numberType); if (isNumber(leftType)) { unify(leftType, rightType, constraint->scope); @@ -723,6 +725,8 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull(leftType) && force) + asMutable(leftType)->ty.emplace(anyPresent ? builtinTypes->anyType : builtinTypes->stringType); if (isString(leftType)) { unify(leftType, rightType, constraint->scope); diff --git a/Analysis/src/Frontend.cpp b/Analysis/src/Frontend.cpp index 98022d86..5beb6c4e 100644 --- a/Analysis/src/Frontend.cpp +++ b/Analysis/src/Frontend.cpp @@ -31,7 +31,8 @@ LUAU_FASTFLAG(LuauInferInNoCheckMode) LUAU_FASTFLAGVARIABLE(LuauKnowsTheDataModel3, false) LUAU_FASTINTVARIABLE(LuauAutocompleteCheckTimeoutMs, 100) LUAU_FASTFLAGVARIABLE(DebugLuauDeferredConstraintResolution, false) -LUAU_FASTFLAGVARIABLE(DebugLuauLogSolverToJson, false); +LUAU_FASTFLAGVARIABLE(DebugLuauLogSolverToJson, false) +LUAU_FASTFLAGVARIABLE(LuauOnDemandTypecheckers, false) namespace Luau { @@ -131,8 +132,8 @@ static void persistCheckedTypes(ModulePtr checkedModule, GlobalTypes& globals, S LoadDefinitionFileResult Frontend::loadDefinitionFile(GlobalTypes& globals, ScopePtr targetScope, std::string_view source, const std::string& packageName, bool captureComments, bool typeCheckForAutocomplete) { - if (!FFlag::DebugLuauDeferredConstraintResolution) - return Luau::loadDefinitionFileNoDCR(typeCheckForAutocomplete ? typeCheckerForAutocomplete : typeChecker, + if (!FFlag::DebugLuauDeferredConstraintResolution && !FFlag::LuauOnDemandTypecheckers) + return Luau::loadDefinitionFileNoDCR(typeCheckForAutocomplete ? typeCheckerForAutocomplete_DEPRECATED : typeChecker_DEPRECATED, typeCheckForAutocomplete ? globalsForAutocomplete : globals, targetScope, source, packageName, captureComments); LUAU_TIMETRACE_SCOPE("loadDefinitionFile", "Frontend"); @@ -142,7 +143,7 @@ LoadDefinitionFileResult Frontend::loadDefinitionFile(GlobalTypes& globals, Scop if (parseResult.errors.size() > 0) return LoadDefinitionFileResult{false, parseResult, sourceModule, nullptr}; - ModulePtr checkedModule = check(sourceModule, Mode::Definition, {}); + ModulePtr checkedModule = check(sourceModule, Mode::Definition, {}, std::nullopt, /*forAutocomplete*/ false, /*recordJsonLog*/ false, {}); if (checkedModule->errors.size() > 0) return LoadDefinitionFileResult{false, parseResult, sourceModule, checkedModule}; @@ -155,6 +156,7 @@ LoadDefinitionFileResult Frontend::loadDefinitionFile(GlobalTypes& globals, Scop LoadDefinitionFileResult loadDefinitionFileNoDCR(TypeChecker& typeChecker, GlobalTypes& globals, ScopePtr targetScope, std::string_view source, const std::string& packageName, bool captureComments) { + LUAU_ASSERT(!FFlag::LuauOnDemandTypecheckers); LUAU_TIMETRACE_SCOPE("loadDefinitionFile", "Frontend"); Luau::SourceModule sourceModule; @@ -406,8 +408,8 @@ Frontend::Frontend(FileResolver* fileResolver, ConfigResolver* configResolver, c , moduleResolverForAutocomplete(this) , globals(builtinTypes) , globalsForAutocomplete(builtinTypes) - , typeChecker(globals.globalScope, &moduleResolver, builtinTypes, &iceHandler) - , typeCheckerForAutocomplete(globalsForAutocomplete.globalScope, &moduleResolverForAutocomplete, builtinTypes, &iceHandler) + , typeChecker_DEPRECATED(globals.globalScope, &moduleResolver, builtinTypes, &iceHandler) + , typeCheckerForAutocomplete_DEPRECATED(globalsForAutocomplete.globalScope, &moduleResolverForAutocomplete, builtinTypes, &iceHandler) , configResolver(configResolver) , options(options) { @@ -491,35 +493,68 @@ CheckResult Frontend::check(const ModuleName& name, std::optional 0) - typeCheckerForAutocomplete.instantiationChildLimit = std::max(1, int(FInt::LuauTarjanChildLimit * sourceNode.autocompleteLimitsMult)); - else - typeCheckerForAutocomplete.instantiationChildLimit = std::nullopt; + if (autocompleteTimeLimit != 0.0) + typeCheckerForAutocomplete_DEPRECATED.finishTime = TimeTrace::getClock() + autocompleteTimeLimit; + else + typeCheckerForAutocomplete_DEPRECATED.finishTime = std::nullopt; - if (FInt::LuauTypeInferIterationLimit > 0) - typeCheckerForAutocomplete.unifierIterationLimit = - std::max(1, int(FInt::LuauTypeInferIterationLimit * sourceNode.autocompleteLimitsMult)); - else - typeCheckerForAutocomplete.unifierIterationLimit = std::nullopt; + // TODO: This is a dirty ad hoc solution for autocomplete timeouts + // We are trying to dynamically adjust our existing limits to lower total typechecking time under the limit + // so that we'll have type information for the whole file at lower quality instead of a full abort in the middle + if (FInt::LuauTarjanChildLimit > 0) + typeCheckerForAutocomplete_DEPRECATED.instantiationChildLimit = + std::max(1, int(FInt::LuauTarjanChildLimit * sourceNode.autocompleteLimitsMult)); + else + typeCheckerForAutocomplete_DEPRECATED.instantiationChildLimit = std::nullopt; - ModulePtr moduleForAutocomplete = - FFlag::DebugLuauDeferredConstraintResolution - ? check(sourceModule, Mode::Strict, requireCycles, /*forAutocomplete*/ true, /*recordJsonLog*/ false) - : typeCheckerForAutocomplete.check(sourceModule, Mode::Strict, environmentScope); + if (FInt::LuauTypeInferIterationLimit > 0) + typeCheckerForAutocomplete_DEPRECATED.unifierIterationLimit = + std::max(1, int(FInt::LuauTypeInferIterationLimit * sourceNode.autocompleteLimitsMult)); + else + typeCheckerForAutocomplete_DEPRECATED.unifierIterationLimit = std::nullopt; + + moduleForAutocomplete = + FFlag::DebugLuauDeferredConstraintResolution + ? check(sourceModule, Mode::Strict, requireCycles, environmentScope, /*forAutocomplete*/ true, /*recordJsonLog*/ false, {}) + : typeCheckerForAutocomplete_DEPRECATED.check(sourceModule, Mode::Strict, environmentScope); + } + else + { + // The autocomplete typecheck is always in strict mode with DM awareness + // to provide better type information for IDE features + TypeCheckLimits typeCheckLimits; + + if (autocompleteTimeLimit != 0.0) + typeCheckLimits.finishTime = TimeTrace::getClock() + autocompleteTimeLimit; + else + typeCheckLimits.finishTime = std::nullopt; + + // TODO: This is a dirty ad hoc solution for autocomplete timeouts + // We are trying to dynamically adjust our existing limits to lower total typechecking time under the limit + // so that we'll have type information for the whole file at lower quality instead of a full abort in the middle + if (FInt::LuauTarjanChildLimit > 0) + typeCheckLimits.instantiationChildLimit = std::max(1, int(FInt::LuauTarjanChildLimit * sourceNode.autocompleteLimitsMult)); + else + typeCheckLimits.instantiationChildLimit = std::nullopt; + + if (FInt::LuauTypeInferIterationLimit > 0) + typeCheckLimits.unifierIterationLimit = std::max(1, int(FInt::LuauTypeInferIterationLimit * sourceNode.autocompleteLimitsMult)); + else + typeCheckLimits.unifierIterationLimit = std::nullopt; + + moduleForAutocomplete = check(sourceModule, Mode::Strict, requireCycles, environmentScope, /*forAutocomplete*/ true, + /*recordJsonLog*/ false, typeCheckLimits); + } moduleResolverForAutocomplete.modules[moduleName] = moduleForAutocomplete; @@ -543,13 +578,22 @@ CheckResult Frontend::check(const ModuleName& name, std::optionalget(global.c_str()); if (name.value) - result->bindings[name].typeId = typeChecker.anyType; + result->bindings[name].typeId = FFlag::LuauOnDemandTypecheckers ? builtinTypes->anyType : typeChecker_DEPRECATED.anyType; } } @@ -829,15 +873,15 @@ const SourceModule* Frontend::getSourceModule(const ModuleName& moduleName) cons ModulePtr check(const SourceModule& sourceModule, const std::vector& requireCycles, NotNull builtinTypes, NotNull iceHandler, NotNull moduleResolver, NotNull fileResolver, - const ScopePtr& globalScope, FrontendOptions options) + const ScopePtr& parentScope, FrontendOptions options) { const bool recordJsonLog = FFlag::DebugLuauLogSolverToJson; - return check(sourceModule, requireCycles, builtinTypes, iceHandler, moduleResolver, fileResolver, globalScope, options, recordJsonLog); + return check(sourceModule, requireCycles, builtinTypes, iceHandler, moduleResolver, fileResolver, parentScope, options, recordJsonLog); } ModulePtr check(const SourceModule& sourceModule, const std::vector& requireCycles, NotNull builtinTypes, NotNull iceHandler, NotNull moduleResolver, NotNull fileResolver, - const ScopePtr& globalScope, FrontendOptions options, bool recordJsonLog) + const ScopePtr& parentScope, FrontendOptions options, bool recordJsonLog) { ModulePtr result = std::make_shared(); result->reduction = std::make_unique(NotNull{&result->internalTypes}, builtinTypes, iceHandler); @@ -868,7 +912,7 @@ ModulePtr check(const SourceModule& sourceModule, const std::vector requireCycles, bool forAutocomplete, bool recordJsonLog) +ModulePtr Frontend::check(const SourceModule& sourceModule, Mode mode, std::vector requireCycles, + std::optional environmentScope, bool forAutocomplete, bool recordJsonLog, TypeCheckLimits typeCheckLimits) { - return Luau::check(sourceModule, requireCycles, builtinTypes, NotNull{&iceHandler}, - NotNull{forAutocomplete ? &moduleResolverForAutocomplete : &moduleResolver}, NotNull{fileResolver}, - forAutocomplete ? globalsForAutocomplete.globalScope : globals.globalScope, options, recordJsonLog); + if (FFlag::DebugLuauDeferredConstraintResolution && mode == Mode::Strict) + { + return Luau::check(sourceModule, requireCycles, builtinTypes, NotNull{&iceHandler}, + NotNull{forAutocomplete ? &moduleResolverForAutocomplete : &moduleResolver}, NotNull{fileResolver}, + environmentScope ? *environmentScope : globals.globalScope, options, recordJsonLog); + } + else + { + LUAU_ASSERT(FFlag::LuauOnDemandTypecheckers); + + TypeChecker typeChecker(globals.globalScope, forAutocomplete ? &moduleResolverForAutocomplete : &moduleResolver, builtinTypes, &iceHandler); + + if (prepareModuleScope) + { + typeChecker.prepareModuleScope = [this, forAutocomplete](const ModuleName& name, const ScopePtr& scope) { + prepareModuleScope(name, scope, forAutocomplete); + }; + } + + typeChecker.requireCycles = requireCycles; + typeChecker.finishTime = typeCheckLimits.finishTime; + typeChecker.instantiationChildLimit = typeCheckLimits.instantiationChildLimit; + typeChecker.unifierIterationLimit = typeCheckLimits.unifierIterationLimit; + + return typeChecker.check(sourceModule, mode, environmentScope); + } } // Read AST into sourceModules if necessary. Trace require()s. Report parse errors. diff --git a/Analysis/src/Module.cpp b/Analysis/src/Module.cpp index fd948403..830aaf75 100644 --- a/Analysis/src/Module.cpp +++ b/Analysis/src/Module.cpp @@ -20,6 +20,7 @@ LUAU_FASTFLAGVARIABLE(LuauClonePublicInterfaceLess2, false); LUAU_FASTFLAG(LuauSubstitutionReentrant); LUAU_FASTFLAG(LuauClassTypeVarsInSubstitution); LUAU_FASTFLAG(LuauSubstitutionFixMissingFields); +LUAU_FASTFLAGVARIABLE(LuauCopyExportedTypes, false); namespace Luau { @@ -37,14 +38,14 @@ static bool contains(Position pos, Comment comment) return false; } -bool isWithinComment(const SourceModule& sourceModule, Position pos) +static bool isWithinComment(const std::vector& commentLocations, Position pos) { - auto iter = std::lower_bound(sourceModule.commentLocations.begin(), sourceModule.commentLocations.end(), - Comment{Lexeme::Comment, Location{pos, pos}}, [](const Comment& a, const Comment& b) { + auto iter = std::lower_bound( + commentLocations.begin(), commentLocations.end(), Comment{Lexeme::Comment, Location{pos, pos}}, [](const Comment& a, const Comment& b) { return a.location.end < b.location.end; }); - if (iter == sourceModule.commentLocations.end()) + if (iter == commentLocations.end()) return false; if (contains(pos, *iter)) @@ -53,12 +54,22 @@ bool isWithinComment(const SourceModule& sourceModule, Position pos) // Due to the nature of std::lower_bound, it is possible that iter points at a comment that ends // at pos. We'll try the next comment, if it exists. ++iter; - if (iter == sourceModule.commentLocations.end()) + if (iter == commentLocations.end()) return false; return contains(pos, *iter); } +bool isWithinComment(const SourceModule& sourceModule, Position pos) +{ + return isWithinComment(sourceModule.commentLocations, pos); +} + +bool isWithinComment(const ParseResult& result, Position pos) +{ + return isWithinComment(result.commentLocations, pos); +} + struct ClonePublicInterface : Substitution { NotNull builtinTypes; @@ -227,7 +238,7 @@ void Module::clonePublicInterface(NotNull builtinTypes, InternalEr // Copy external stuff over to Module itself this->returnType = moduleScope->returnType; - if (FFlag::DebugLuauDeferredConstraintResolution) + if (FFlag::DebugLuauDeferredConstraintResolution || FFlag::LuauCopyExportedTypes) this->exportedTypeBindings = moduleScope->exportedTypeBindings; else this->exportedTypeBindings = std::move(moduleScope->exportedTypeBindings); diff --git a/Analysis/src/Type.cpp b/Analysis/src/Type.cpp index d70f17f5..52854108 100644 --- a/Analysis/src/Type.cpp +++ b/Analysis/src/Type.cpp @@ -337,7 +337,16 @@ bool isSubset(const UnionType& super, const UnionType& sub) return true; } +bool hasPrimitiveTypeInIntersection(TypeId ty, PrimitiveType::Type primTy) +{ + TypeId tf = follow(ty); + if (isPrim(tf, primTy)) + return true; + for (auto t : flattenIntersection(tf)) + return isPrim(follow(t), primTy); + return false; +} // When typechecking an assignment `x = e`, we typecheck `x:T` and `e:U`, // then instantiate U if `isGeneric(U)` is true, and `maybeGeneric(T)` is false. bool isGeneric(TypeId ty) diff --git a/Analysis/src/TypeChecker2.cpp b/Analysis/src/TypeChecker2.cpp index c7d30f43..6e76af04 100644 --- a/Analysis/src/TypeChecker2.cpp +++ b/Analysis/src/TypeChecker2.cpp @@ -1160,11 +1160,7 @@ struct TypeChecker2 visit(expr, RValue); TypeId leftType = stripFromNilAndReport(lookupType(expr), location); - const NormalizedType* norm = normalizer.normalize(leftType); - if (!norm) - reportError(NormalizationTooComplex{}, location); - - checkIndexTypeFromType(leftType, *norm, propName, location, context); + checkIndexTypeFromType(leftType, propName, location, context); } void visit(AstExprIndexName* indexName, ValueContext context) @@ -2033,8 +2029,16 @@ struct TypeChecker2 reportError(std::move(e)); } - void checkIndexTypeFromType(TypeId tableTy, const NormalizedType& norm, const std::string& prop, const Location& location, ValueContext context) + // If the provided type does not have the named property, report an error. + void checkIndexTypeFromType(TypeId tableTy, const std::string& prop, const Location& location, ValueContext context) { + const NormalizedType* norm = normalizer.normalize(tableTy); + if (!norm) + { + reportError(NormalizationTooComplex{}, location); + return; + } + bool foundOneProp = false; std::vector typesMissingTheProp; @@ -2042,49 +2046,50 @@ struct TypeChecker2 if (!normalizer.isInhabited(ty)) return; - bool found = hasIndexTypeFromType(ty, prop, location); + std::unordered_set seen; + bool found = hasIndexTypeFromType(ty, prop, location, seen); foundOneProp |= found; if (!found) typesMissingTheProp.push_back(ty); }; - fetch(norm.tops); - fetch(norm.booleans); + fetch(norm->tops); + fetch(norm->booleans); if (FFlag::LuauNegatedClassTypes) { - for (const auto& [ty, _negations] : norm.classes.classes) + for (const auto& [ty, _negations] : norm->classes.classes) { fetch(ty); } } else { - for (TypeId ty : norm.DEPRECATED_classes) + for (TypeId ty : norm->DEPRECATED_classes) fetch(ty); } - fetch(norm.errors); - fetch(norm.nils); - fetch(norm.numbers); - if (!norm.strings.isNever()) + fetch(norm->errors); + fetch(norm->nils); + fetch(norm->numbers); + if (!norm->strings.isNever()) fetch(builtinTypes->stringType); - fetch(norm.threads); - for (TypeId ty : norm.tables) + fetch(norm->threads); + for (TypeId ty : norm->tables) fetch(ty); - if (norm.functions.isTop) + if (norm->functions.isTop) fetch(builtinTypes->functionType); - else if (!norm.functions.isNever()) + else if (!norm->functions.isNever()) { - if (norm.functions.parts.size() == 1) - fetch(norm.functions.parts.front()); + if (norm->functions.parts.size() == 1) + fetch(norm->functions.parts.front()); else { std::vector parts; - parts.insert(parts.end(), norm.functions.parts.begin(), norm.functions.parts.end()); + parts.insert(parts.end(), norm->functions.parts.begin(), norm->functions.parts.end()); fetch(testArena.addType(IntersectionType{std::move(parts)})); } } - for (const auto& [tyvar, intersect] : norm.tyvars) + for (const auto& [tyvar, intersect] : norm->tyvars) { if (get(intersect->tops)) { @@ -2110,8 +2115,15 @@ struct TypeChecker2 } } - bool hasIndexTypeFromType(TypeId ty, const std::string& prop, const Location& location) + bool hasIndexTypeFromType(TypeId ty, const std::string& prop, const Location& location, std::unordered_set& seen) { + // If we have already encountered this type, we must assume that some + // other codepath will do the right thing and signal false if the + // property is not present. + const bool isUnseen = seen.insert(ty).second; + if (!isUnseen) + return true; + if (get(ty) || get(ty) || get(ty)) return true; @@ -2136,10 +2148,12 @@ struct TypeChecker2 else if (const ClassType* cls = get(ty)) return bool(lookupClassProp(cls, prop)); else if (const UnionType* utv = get(ty)) - ice.ice("getIndexTypeFromTypeHelper cannot take a UnionType"); + return std::all_of(begin(utv), end(utv), [&](TypeId part) { + return hasIndexTypeFromType(part, prop, location, seen); + }); else if (const IntersectionType* itv = get(ty)) return std::any_of(begin(itv), end(itv), [&](TypeId part) { - return hasIndexTypeFromType(part, prop, location); + return hasIndexTypeFromType(part, prop, location, seen); }); else return false; diff --git a/Analysis/src/TypeInfer.cpp b/Analysis/src/TypeInfer.cpp index acf70fec..7f366a20 100644 --- a/Analysis/src/TypeInfer.cpp +++ b/Analysis/src/TypeInfer.cpp @@ -35,14 +35,13 @@ LUAU_FASTFLAG(LuauKnowsTheDataModel3) LUAU_FASTFLAGVARIABLE(DebugLuauFreezeDuringUnification, false) LUAU_FASTFLAGVARIABLE(LuauReturnAnyInsteadOfICE, false) // Eventually removed as false. LUAU_FASTFLAGVARIABLE(DebugLuauSharedSelf, false) -LUAU_FASTFLAGVARIABLE(LuauTryhardAnd, false) LUAU_FASTFLAG(LuauInstantiateInSubtyping) LUAU_FASTFLAG(LuauNegatedClassTypes) LUAU_FASTFLAGVARIABLE(LuauAllowIndexClassParameters, false) LUAU_FASTFLAG(LuauUninhabitedSubAnything2) +LUAU_FASTFLAG(LuauOccursIsntAlwaysFailure) LUAU_FASTFLAGVARIABLE(LuauTypecheckTypeguards, false) LUAU_FASTFLAGVARIABLE(LuauTinyControlFlowAnalysis, false) -LUAU_FASTFLAGVARIABLE(LuauReducingAndOr, false) namespace Luau { @@ -1623,9 +1622,28 @@ ControlFlow TypeChecker::check(const ScopePtr& scope, const AstStatTypeAlias& ty TypeId& bindingType = bindingsMap[name].type; - if (unify(ty, bindingType, aliasScope, typealias.location)) - bindingType = ty; + if (!FFlag::LuauOccursIsntAlwaysFailure) + { + if (unify(ty, bindingType, aliasScope, typealias.location)) + bindingType = ty; + return ControlFlow::None; + } + unify(ty, bindingType, aliasScope, typealias.location); + + // It is possible for this unification to succeed but for + // `bindingType` still to be free For example, in + // `type T = T|T`, we generate a fresh free type `X`, and then + // unify `X` with `X|X`, which succeeds without binding `X` to + // anything, since `X <: X|X` + if (bindingType->ty.get_if()) + { + ty = errorRecoveryType(aliasScope); + unify(ty, bindingType, aliasScope, typealias.location); + reportError(TypeError{typealias.location, OccursCheckFailed{}}); + } + + bindingType = ty; return ControlFlow::None; } @@ -2848,7 +2866,7 @@ TypeId TypeChecker::checkRelationalOperation( { return lhsType; } - else if (FFlag::LuauTryhardAnd) + else { // If lhs is free, we can't tell which 'falsy' components it has, if any if (get(lhsType)) @@ -2860,14 +2878,11 @@ TypeId TypeChecker::checkRelationalOperation( { LUAU_ASSERT(oty); - if (FFlag::LuauReducingAndOr) - { - // Perform a limited form of type reduction for booleans - if (isPrim(*oty, PrimitiveType::Boolean) && get(get(follow(rhsType)))) - return booleanType; - if (isPrim(rhsType, PrimitiveType::Boolean) && get(get(follow(*oty)))) - return booleanType; - } + // Perform a limited form of type reduction for booleans + if (isPrim(*oty, PrimitiveType::Boolean) && get(get(follow(rhsType)))) + return booleanType; + if (isPrim(rhsType, PrimitiveType::Boolean) && get(get(follow(*oty)))) + return booleanType; return unionOfTypes(*oty, rhsType, scope, expr.location, false); } @@ -2876,16 +2891,12 @@ TypeId TypeChecker::checkRelationalOperation( return rhsType; } } - else - { - return unionOfTypes(rhsType, booleanType, scope, expr.location, false); - } case AstExprBinary::Or: if (lhsIsAny) { return lhsType; } - else if (FFlag::LuauTryhardAnd) + else { auto [oty, notNever] = pickTypesFromSense(lhsType, true, neverType); // Filter out truthy types @@ -2893,14 +2904,11 @@ TypeId TypeChecker::checkRelationalOperation( { LUAU_ASSERT(oty); - if (FFlag::LuauReducingAndOr) - { - // Perform a limited form of type reduction for booleans - if (isPrim(*oty, PrimitiveType::Boolean) && get(get(follow(rhsType)))) - return booleanType; - if (isPrim(rhsType, PrimitiveType::Boolean) && get(get(follow(*oty)))) - return booleanType; - } + // Perform a limited form of type reduction for booleans + if (isPrim(*oty, PrimitiveType::Boolean) && get(get(follow(rhsType)))) + return booleanType; + if (isPrim(rhsType, PrimitiveType::Boolean) && get(get(follow(*oty)))) + return booleanType; return unionOfTypes(*oty, rhsType, scope, expr.location); } @@ -2909,10 +2917,6 @@ TypeId TypeChecker::checkRelationalOperation( return rhsType; } } - else - { - return unionOfTypes(lhsType, rhsType, scope, expr.location); - } default: LUAU_ASSERT(0); ice(format("checkRelationalOperation called with incorrect binary expression '%s'", toString(expr.op).c_str()), expr.location); diff --git a/Analysis/src/Unifier.cpp b/Analysis/src/Unifier.cpp index 642aa399..3f4e34f6 100644 --- a/Analysis/src/Unifier.cpp +++ b/Analysis/src/Unifier.cpp @@ -19,8 +19,10 @@ LUAU_FASTINT(LuauTypeInferTypePackLoopLimit) LUAU_FASTFLAG(LuauErrorRecoveryType) LUAU_FASTFLAGVARIABLE(LuauInstantiateInSubtyping, false) LUAU_FASTFLAGVARIABLE(LuauUninhabitedSubAnything2, false) +LUAU_FASTFLAGVARIABLE(LuauVariadicAnyCanBeGeneric, false) LUAU_FASTFLAGVARIABLE(LuauMaintainScopesInUnifier, false) LUAU_FASTFLAGVARIABLE(LuauTransitiveSubtyping, false) +LUAU_FASTFLAGVARIABLE(LuauOccursIsntAlwaysFailure, false) LUAU_FASTFLAG(LuauClassTypeVarsInSubstitution) LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution) LUAU_FASTFLAG(LuauNormalizeBlockedTypes) @@ -431,14 +433,14 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool if (superFree && subFree && subsumes(useScopes, superFree, subFree)) { - if (!occursCheck(subTy, superTy)) + if (!occursCheck(subTy, superTy, /* reversed = */ false)) log.replace(subTy, BoundType(superTy)); return; } else if (superFree && subFree) { - if (!occursCheck(superTy, subTy)) + if (!occursCheck(superTy, subTy, /* reversed = */ true)) { if (subsumes(useScopes, superFree, subFree)) { @@ -461,7 +463,7 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool return; } - if (!occursCheck(superTy, subTy)) + if (!occursCheck(superTy, subTy, /* reversed = */ true)) { promoteTypeLevels(log, types, superFree->level, superFree->scope, useScopes, subTy); @@ -487,7 +489,7 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool return; } - if (!occursCheck(subTy, superTy)) + if (!occursCheck(subTy, superTy, /* reversed = */ false)) { promoteTypeLevels(log, types, subFree->level, subFree->scope, useScopes, superTy); log.replace(subTy, BoundType(superTy)); @@ -1593,7 +1595,7 @@ void Unifier::tryUnify_(TypePackId subTp, TypePackId superTp, bool isFunctionCal if (log.getMutable(superTp)) { - if (!occursCheck(superTp, subTp)) + if (!occursCheck(superTp, subTp, /* reversed = */ true)) { Widen widen{types, builtinTypes}; log.replace(superTp, Unifiable::Bound(widen(subTp))); @@ -1601,7 +1603,7 @@ void Unifier::tryUnify_(TypePackId subTp, TypePackId superTp, bool isFunctionCal } else if (log.getMutable(subTp)) { - if (!occursCheck(subTp, superTp)) + if (!occursCheck(subTp, superTp, /* reversed = */ false)) { log.replace(subTp, Unifiable::Bound(superTp)); } @@ -2585,13 +2587,14 @@ static void queueTypePack(std::vector& queue, DenseHashSet& void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool reversed, int subOffset) { const VariadicTypePack* superVariadic = log.getMutable(superTp); + const TypeId variadicTy = follow(superVariadic->ty); if (!superVariadic) ice("passed non-variadic pack to tryUnifyVariadics"); if (const VariadicTypePack* subVariadic = log.get(subTp)) { - tryUnify_(reversed ? superVariadic->ty : subVariadic->ty, reversed ? subVariadic->ty : superVariadic->ty); + tryUnify_(reversed ? variadicTy : subVariadic->ty, reversed ? subVariadic->ty : variadicTy); } else if (log.get(subTp)) { @@ -2602,7 +2605,7 @@ void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool rever while (subIter != subEnd) { - tryUnify_(reversed ? superVariadic->ty : *subIter, reversed ? *subIter : superVariadic->ty); + tryUnify_(reversed ? variadicTy : *subIter, reversed ? *subIter : variadicTy); ++subIter; } @@ -2615,7 +2618,7 @@ void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool rever } else if (const VariadicTypePack* vtp = get(tail)) { - tryUnify_(vtp->ty, superVariadic->ty); + tryUnify_(vtp->ty, variadicTy); } else if (get(tail)) { @@ -2631,6 +2634,10 @@ void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool rever } } } + else if (FFlag::LuauVariadicAnyCanBeGeneric && get(variadicTy) && log.get(subTp)) + { + // Nothing to do. This is ok. + } else { reportError(location, GenericError{"Failed to unify variadic packs"}); @@ -2751,11 +2758,42 @@ TxnLog Unifier::combineLogsIntoUnion(std::vector logs) return result; } -bool Unifier::occursCheck(TypeId needle, TypeId haystack) +bool Unifier::occursCheck(TypeId needle, TypeId haystack, bool reversed) { sharedState.tempSeenTy.clear(); - return occursCheck(sharedState.tempSeenTy, needle, haystack); + bool occurs = occursCheck(sharedState.tempSeenTy, needle, haystack); + + if (occurs && FFlag::LuauOccursIsntAlwaysFailure) + { + Unifier innerState = makeChildUnifier(); + if (const UnionType* ut = get(haystack)) + { + if (reversed) + innerState.tryUnifyUnionWithType(haystack, ut, needle); + else + innerState.tryUnifyTypeWithUnion(needle, haystack, ut, /* cacheEnabled = */ false, /* isFunction = */ false); + } + else if (const IntersectionType* it = get(haystack)) + { + if (reversed) + innerState.tryUnifyIntersectionWithType(haystack, it, needle, /* cacheEnabled = */ false, /* isFunction = */ false); + else + innerState.tryUnifyTypeWithIntersection(needle, haystack, it); + } + else + { + innerState.failure = true; + } + + if (innerState.failure) + { + reportError(location, OccursCheckFailed{}); + log.replace(needle, *builtinTypes->errorRecoveryType()); + } + } + + return occurs; } bool Unifier::occursCheck(DenseHashSet& seen, TypeId needle, TypeId haystack) @@ -2785,8 +2823,11 @@ bool Unifier::occursCheck(DenseHashSet& seen, TypeId needle, TypeId hays if (needle == haystack) { - reportError(location, OccursCheckFailed{}); - log.replace(needle, *builtinTypes->errorRecoveryType()); + if (!FFlag::LuauOccursIsntAlwaysFailure) + { + reportError(location, OccursCheckFailed{}); + log.replace(needle, *builtinTypes->errorRecoveryType()); + } return true; } @@ -2807,11 +2848,19 @@ bool Unifier::occursCheck(DenseHashSet& seen, TypeId needle, TypeId hays return occurrence; } -bool Unifier::occursCheck(TypePackId needle, TypePackId haystack) +bool Unifier::occursCheck(TypePackId needle, TypePackId haystack, bool reversed) { sharedState.tempSeenTp.clear(); - return occursCheck(sharedState.tempSeenTp, needle, haystack); + bool occurs = occursCheck(sharedState.tempSeenTp, needle, haystack); + + if (occurs && FFlag::LuauOccursIsntAlwaysFailure) + { + reportError(location, OccursCheckFailed{}); + log.replace(needle, *builtinTypes->errorRecoveryTypePack()); + } + + return occurs; } bool Unifier::occursCheck(DenseHashSet& seen, TypePackId needle, TypePackId haystack) @@ -2836,8 +2885,11 @@ bool Unifier::occursCheck(DenseHashSet& seen, TypePackId needle, Typ { if (needle == haystack) { - reportError(location, OccursCheckFailed{}); - log.replace(needle, *builtinTypes->errorRecoveryTypePack()); + if (!FFlag::LuauOccursIsntAlwaysFailure) + { + reportError(location, OccursCheckFailed{}); + log.replace(needle, *builtinTypes->errorRecoveryTypePack()); + } return true; } diff --git a/Ast/src/StringUtils.cpp b/Ast/src/StringUtils.cpp index 11e0076a..343c553c 100644 --- a/Ast/src/StringUtils.cpp +++ b/Ast/src/StringUtils.cpp @@ -167,7 +167,9 @@ size_t editDistance(std::string_view a, std::string_view b) for (size_t y = 1; y <= b.size(); ++y) { - size_t x1 = seenCharToRow[b[y - 1]]; + // The value of b[N] can be negative with unicode characters + unsigned char bSeenCharIndex = static_cast(b[y - 1]); + size_t x1 = seenCharToRow[bSeenCharIndex]; size_t y1 = lastMatchedY; size_t cost = 1; @@ -187,7 +189,9 @@ size_t editDistance(std::string_view a, std::string_view b) distances[getPos(x + 1, y + 1)] = std::min(std::min(insertion, deletion), std::min(substitution, transposition)); } - seenCharToRow[a[x - 1]] = x; + // The value of a[N] can be negative with unicode characters + unsigned char aSeenCharIndex = static_cast(a[x - 1]); + seenCharToRow[aSeenCharIndex] = x; } return distances[getPos(a.size() + 1, b.size() + 1)]; diff --git a/CodeGen/include/Luau/AddressA64.h b/CodeGen/include/Luau/AddressA64.h index 2796ef70..acb64e39 100644 --- a/CodeGen/include/Luau/AddressA64.h +++ b/CodeGen/include/Luau/AddressA64.h @@ -29,7 +29,7 @@ struct AddressA64 // For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB static constexpr size_t kMaxOffset = 1023; - AddressA64(RegisterA64 base, int off = 0) + constexpr AddressA64(RegisterA64 base, int off = 0) : kind(AddressKindA64::imm) , base(base) , offset(xzr) @@ -38,7 +38,7 @@ struct AddressA64 LUAU_ASSERT(base.kind == KindA64::x || base == sp); } - AddressA64(RegisterA64 base, RegisterA64 offset) + constexpr AddressA64(RegisterA64 base, RegisterA64 offset) : kind(AddressKindA64::reg) , base(base) , offset(offset) diff --git a/CodeGen/include/Luau/AssemblyBuilderA64.h b/CodeGen/include/Luau/AssemblyBuilderA64.h index def4d0c0..42f5f8a6 100644 --- a/CodeGen/include/Luau/AssemblyBuilderA64.h +++ b/CodeGen/include/Luau/AssemblyBuilderA64.h @@ -49,17 +49,25 @@ public: void cmp(RegisterA64 src1, RegisterA64 src2); void cmp(RegisterA64 src1, uint16_t src2); void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond); + void cset(RegisterA64 dst, ConditionA64 cond); // Bitwise - // TODO: support immediate arguments (they have odd encoding and forbid many values) - // TODO: support bic (andnot) // TODO: support shifts // TODO: support bitfield ops void and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); void orr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); void eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + void tst(RegisterA64 src1, RegisterA64 src2); void mvn(RegisterA64 dst, RegisterA64 src); + // Bitwise with immediate + // Note: immediate must have a single contiguous sequence of 1 bits set of length 1..31 + void and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2); + void orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2); + void eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2); + void tst(RegisterA64 src1, uint32_t src2); + // Shifts void lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); void lsr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); @@ -168,7 +176,7 @@ public: private: // Instruction archetypes void place0(const char* name, uint32_t word); - void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0); + void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0, int N = 0); void placeSR2(const char* name, RegisterA64 dst, RegisterA64 src, uint8_t op, uint8_t op2 = 0); void placeR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t op2); void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op); @@ -181,8 +189,9 @@ private: void placeADR(const char* name, RegisterA64 src, uint8_t op); void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label); void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog); - void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc); + void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert = 0); void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc); + void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op); void place(uint32_t word); diff --git a/CodeGen/include/Luau/IrCallWrapperX64.h b/CodeGen/include/Luau/IrCallWrapperX64.h index 724d4624..c403d189 100644 --- a/CodeGen/include/Luau/IrCallWrapperX64.h +++ b/CodeGen/include/Luau/IrCallWrapperX64.h @@ -41,12 +41,14 @@ public: void call(const OperandX64& func); + RegisterX64 suggestNextArgumentRegister(SizeX64 size) const; + IrRegAllocX64& regs; AssemblyBuilderX64& build; uint32_t instIdx = ~0u; private: - void assignTargetRegisters(); + OperandX64 getNextArgumentTarget(SizeX64 size) const; void countRegisterUses(); CallArgument* findNonInterferingArgument(); bool interferesWithOperand(const OperandX64& op, RegisterX64 reg) const; @@ -67,6 +69,9 @@ private: std::array args; int argCount = 0; + int gprPos = 0; + int xmmPos = 0; + OperandX64 funcOp; // Internal counters for remaining register use counts diff --git a/CodeGen/include/Luau/IrData.h b/CodeGen/include/Luau/IrData.h index fcf29adb..486a0135 100644 --- a/CodeGen/include/Luau/IrData.h +++ b/CodeGen/include/Luau/IrData.h @@ -155,7 +155,7 @@ enum class IrCmd : uint8_t // Compute Luau 'not' operation on destructured TValue // A: tag - // B: double + // B: int (value) NOT_ANY, // TODO: boolean specialization will be useful // Unconditional jump @@ -233,7 +233,7 @@ enum class IrCmd : uint8_t // Try to get pointer to tag method TValue inside the table's metatable or jump if there is no such value or metatable // A: table - // B: int + // B: int (TMS enum) // C: block TRY_CALL_FASTGETTM, @@ -256,8 +256,8 @@ enum class IrCmd : uint8_t // B: Rn (result start) // C: Rn (argument start) // D: Rn or Kn or a boolean that's false (optional second argument) - // E: int (argument count or -1 to use all arguments up to stack top) - // F: int (result count or -1 to preserve all results and adjust stack top) + // E: int (argument count) + // F: int (result count) FASTCALL, // Call the fastcall builtin function @@ -517,8 +517,10 @@ enum class IrCmd : uint8_t FALLBACK_FORGPREP, // Instruction that passes value through, it is produced by constant folding and users substitute it with the value + // When operand location is set, updates the tracked location of the value in memory SUBSTITUTE, // A: operand of any type + // B: Rn/Kn/none (location of operand in memory; optional) }; enum class IrConstKind : uint8_t @@ -694,6 +696,9 @@ struct IrFunction std::vector bcMapping; + // For each instruction, an operand that can be used to recompute the calue + std::vector valueRestoreOps; + Proto* proto = nullptr; CfgInfo cfg; @@ -829,19 +834,40 @@ struct IrFunction return value.valueDouble; } - uint32_t getBlockIndex(const IrBlock& block) + uint32_t getBlockIndex(const IrBlock& block) const { // Can only be called with blocks from our vector LUAU_ASSERT(&block >= blocks.data() && &block <= blocks.data() + blocks.size()); return uint32_t(&block - blocks.data()); } - uint32_t getInstIndex(const IrInst& inst) + uint32_t getInstIndex(const IrInst& inst) const { // Can only be called with instructions from our vector LUAU_ASSERT(&inst >= instructions.data() && &inst <= instructions.data() + instructions.size()); return uint32_t(&inst - instructions.data()); } + + void recordRestoreOp(uint32_t instIdx, IrOp location) + { + if (instIdx >= valueRestoreOps.size()) + valueRestoreOps.resize(instIdx + 1); + + valueRestoreOps[instIdx] = location; + } + + IrOp findRestoreOp(uint32_t instIdx) const + { + if (instIdx >= valueRestoreOps.size()) + return {}; + + return valueRestoreOps[instIdx]; + } + + IrOp findRestoreOp(const IrInst& inst) const + { + return findRestoreOp(getInstIndex(inst)); + } }; inline IrCondition conditionOp(IrOp op) diff --git a/CodeGen/include/Luau/IrRegAllocX64.h b/CodeGen/include/Luau/IrRegAllocX64.h index dc7b48c6..f83cc220 100644 --- a/CodeGen/include/Luau/IrRegAllocX64.h +++ b/CodeGen/include/Luau/IrRegAllocX64.h @@ -20,7 +20,9 @@ constexpr uint8_t kNoStackSlot = 0xff; struct IrSpillX64 { uint32_t instIdx = 0; - bool useDoubleSlot = 0; + IrValueKind valueKind = IrValueKind::Unknown; + + unsigned spillId = 0; // Spill location can be a stack location or be empty // When it's empty, it means that instruction value can be rematerialized @@ -33,12 +35,8 @@ struct IrRegAllocX64 { IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function); - RegisterX64 allocGprReg(SizeX64 preferredSize, uint32_t instIdx); - RegisterX64 allocXmmReg(uint32_t instIdx); - - RegisterX64 allocGprRegOrReuse(SizeX64 preferredSize, uint32_t instIdx, std::initializer_list oprefs); - RegisterX64 allocXmmRegOrReuse(uint32_t instIdx, std::initializer_list oprefs); - + RegisterX64 allocReg(SizeX64 size, uint32_t instIdx); + RegisterX64 allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list oprefs); RegisterX64 takeReg(RegisterX64 reg, uint32_t instIdx); void freeReg(RegisterX64 reg); @@ -49,6 +47,12 @@ struct IrRegAllocX64 bool shouldFreeGpr(RegisterX64 reg) const; + unsigned findSpillStackSlot(IrValueKind valueKind); + + IrOp getRestoreOp(const IrInst& inst) const; + bool hasRestoreOp(const IrInst& inst) const; + OperandX64 getRestoreAddress(const IrInst& inst, IrOp restoreOp); + // Register used by instruction is about to be freed, have to find a way to restore value later void preserve(IrInst& inst); @@ -74,6 +78,7 @@ struct IrRegAllocX64 std::bitset<256> usedSpillSlots; unsigned maxUsedSlot = 0; + unsigned nextSpillId = 1; std::vector spills; }; @@ -107,10 +112,8 @@ struct ScopedSpills ScopedSpills(const ScopedSpills&) = delete; ScopedSpills& operator=(const ScopedSpills&) = delete; - bool wasSpilledBefore(const IrSpillX64& spill) const; - IrRegAllocX64& owner; - std::vector snapshot; + unsigned startSpillId = 0; }; } // namespace X64 diff --git a/CodeGen/include/Luau/IrUtils.h b/CodeGen/include/Luau/IrUtils.h index 09c55c79..136ce3b8 100644 --- a/CodeGen/include/Luau/IrUtils.h +++ b/CodeGen/include/Luau/IrUtils.h @@ -200,7 +200,7 @@ void replace(IrFunction& function, IrOp& original, IrOp replacement); void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement); // Replace instruction with a different value (using IrCmd::SUBSTITUTE) -void substitute(IrFunction& function, IrInst& inst, IrOp replacement); +void substitute(IrFunction& function, IrInst& inst, IrOp replacement, IrOp location = {}); // Replace instruction arguments that point to substitutions with target values void applySubstitutions(IrFunction& function, IrOp& op); diff --git a/CodeGen/include/Luau/RegisterA64.h b/CodeGen/include/Luau/RegisterA64.h index 99e62958..c3a9ae03 100644 --- a/CodeGen/include/Luau/RegisterA64.h +++ b/CodeGen/include/Luau/RegisterA64.h @@ -46,6 +46,18 @@ constexpr RegisterA64 castReg(KindA64 kind, RegisterA64 reg) return RegisterA64{kind, reg.index}; } +// This is equivalent to castReg(KindA64::x), but is separate because it implies different semantics +// Specifically, there are cases when it's useful to treat a wN register as an xN register *after* it has been assigned a value +// Since all A64 instructions that write to wN implicitly zero the top half, this works when we need zero extension semantics +// Crucially, this is *not* safe on an ABI boundary - an int parameter in wN register may have anything in its top half in certain cases +// However, as long as our codegen doesn't use 32-bit truncation by using castReg x=>w, we can safely rely on this. +constexpr RegisterA64 zextReg(RegisterA64 reg) +{ + LUAU_ASSERT(reg.kind == KindA64::w); + + return RegisterA64{KindA64::x, reg.index}; +} + constexpr RegisterA64 noreg{KindA64::none, 0}; constexpr RegisterA64 w0{KindA64::w, 0}; diff --git a/CodeGen/include/Luau/RegisterX64.h b/CodeGen/include/Luau/RegisterX64.h index 9d76b116..7fa97607 100644 --- a/CodeGen/include/Luau/RegisterX64.h +++ b/CodeGen/include/Luau/RegisterX64.h @@ -46,6 +46,18 @@ constexpr RegisterX64 al{SizeX64::byte, 0}; constexpr RegisterX64 cl{SizeX64::byte, 1}; constexpr RegisterX64 dl{SizeX64::byte, 2}; constexpr RegisterX64 bl{SizeX64::byte, 3}; +constexpr RegisterX64 spl{SizeX64::byte, 4}; +constexpr RegisterX64 bpl{SizeX64::byte, 5}; +constexpr RegisterX64 sil{SizeX64::byte, 6}; +constexpr RegisterX64 dil{SizeX64::byte, 7}; +constexpr RegisterX64 r8b{SizeX64::byte, 8}; +constexpr RegisterX64 r9b{SizeX64::byte, 9}; +constexpr RegisterX64 r10b{SizeX64::byte, 10}; +constexpr RegisterX64 r11b{SizeX64::byte, 11}; +constexpr RegisterX64 r12b{SizeX64::byte, 12}; +constexpr RegisterX64 r13b{SizeX64::byte, 13}; +constexpr RegisterX64 r14b{SizeX64::byte, 14}; +constexpr RegisterX64 r15b{SizeX64::byte, 15}; constexpr RegisterX64 eax{SizeX64::dword, 0}; constexpr RegisterX64 ecx{SizeX64::dword, 1}; diff --git a/CodeGen/include/Luau/UnwindBuilder.h b/CodeGen/include/Luau/UnwindBuilder.h index 98e60498..8fe55ba6 100644 --- a/CodeGen/include/Luau/UnwindBuilder.h +++ b/CodeGen/include/Luau/UnwindBuilder.h @@ -11,6 +11,9 @@ namespace Luau namespace CodeGen { +// This value is used in 'finishFunction' to mark the function that spans to the end of the whole code block +static uint32_t kFullBlockFuncton = ~0u; + class UnwindBuilder { public: @@ -19,19 +22,22 @@ public: virtual void setBeginOffset(size_t beginOffset) = 0; virtual size_t getBeginOffset() const = 0; - virtual void start() = 0; + virtual void startInfo() = 0; + virtual void startFunction() = 0; virtual void spill(int espOffset, X64::RegisterX64 reg) = 0; virtual void save(X64::RegisterX64 reg) = 0; virtual void allocStack(int size) = 0; virtual void setupFrameReg(X64::RegisterX64 reg, int espOffset) = 0; + virtual void finishFunction(uint32_t beginOffset, uint32_t endOffset) = 0; - virtual void finish() = 0; + virtual void finishInfo() = 0; virtual size_t getSize() const = 0; + virtual size_t getFunctionCount() const = 0; // This will place the unwinding data at the target address and might update values of some fields - virtual void finalize(char* target, void* funcAddress, size_t funcSize) const = 0; + virtual void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const = 0; }; } // namespace CodeGen diff --git a/CodeGen/include/Luau/UnwindBuilderDwarf2.h b/CodeGen/include/Luau/UnwindBuilderDwarf2.h index 972f7423..9f862d23 100644 --- a/CodeGen/include/Luau/UnwindBuilderDwarf2.h +++ b/CodeGen/include/Luau/UnwindBuilderDwarf2.h @@ -4,34 +4,48 @@ #include "Luau/RegisterX64.h" #include "UnwindBuilder.h" +#include + namespace Luau { namespace CodeGen { +struct UnwindFunctionDwarf2 +{ + uint32_t beginOffset; + uint32_t endOffset; + uint32_t fdeEntryStartPos; +}; + class UnwindBuilderDwarf2 : public UnwindBuilder { public: void setBeginOffset(size_t beginOffset) override; size_t getBeginOffset() const override; - void start() override; + void startInfo() override; + void startFunction() override; void spill(int espOffset, X64::RegisterX64 reg) override; void save(X64::RegisterX64 reg) override; void allocStack(int size) override; void setupFrameReg(X64::RegisterX64 reg, int espOffset) override; + void finishFunction(uint32_t beginOffset, uint32_t endOffset) override; - void finish() override; + void finishInfo() override; size_t getSize() const override; + size_t getFunctionCount() const override; - void finalize(char* target, void* funcAddress, size_t funcSize) const override; + void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override; private: size_t beginOffset = 0; - static const unsigned kRawDataLimit = 128; + std::vector unwindFunctions; + + static const unsigned kRawDataLimit = 1024; uint8_t rawData[kRawDataLimit]; uint8_t* pos = rawData; diff --git a/CodeGen/include/Luau/UnwindBuilderWin.h b/CodeGen/include/Luau/UnwindBuilderWin.h index 1cd750a1..ccd7125d 100644 --- a/CodeGen/include/Luau/UnwindBuilderWin.h +++ b/CodeGen/include/Luau/UnwindBuilderWin.h @@ -11,6 +11,25 @@ namespace Luau namespace CodeGen { +// This struct matches the layout of x64 RUNTIME_FUNCTION from winnt.h +struct UnwindFunctionWin +{ + uint32_t beginOffset; + uint32_t endOffset; + uint32_t unwindInfoOffset; +}; + +// This struct matches the layout of x64 UNWIND_INFO from ehdata.h +struct UnwindInfoWin +{ + uint8_t version : 3; + uint8_t flags : 5; + uint8_t prologsize; + uint8_t unwindcodecount; + uint8_t framereg : 4; + uint8_t frameregoff : 4; +}; + // This struct matches the layout of UNWIND_CODE from ehdata.h struct UnwindCodeWin { @@ -25,31 +44,38 @@ public: void setBeginOffset(size_t beginOffset) override; size_t getBeginOffset() const override; - void start() override; + void startInfo() override; + void startFunction() override; void spill(int espOffset, X64::RegisterX64 reg) override; void save(X64::RegisterX64 reg) override; void allocStack(int size) override; void setupFrameReg(X64::RegisterX64 reg, int espOffset) override; + void finishFunction(uint32_t beginOffset, uint32_t endOffset) override; - void finish() override; + void finishInfo() override; size_t getSize() const override; + size_t getFunctionCount() const override; - void finalize(char* target, void* funcAddress, size_t funcSize) const override; + void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override; private: size_t beginOffset = 0; + static const unsigned kRawDataLimit = 1024; + uint8_t rawData[kRawDataLimit]; + uint8_t* rawDataPos = rawData; + + std::vector unwindFunctions; + // Windows unwind codes are written in reverse, so we have to collect them all first std::vector unwindCodes; uint8_t prologSize = 0; - X64::RegisterX64 frameReg = X64::rax; // rax means that frame register is not used + X64::RegisterX64 frameReg = X64::noreg; uint8_t frameRegOffset = 0; uint32_t stackOffset = 0; - - size_t infoSize = 0; }; } // namespace CodeGen diff --git a/CodeGen/src/AssemblyBuilderA64.cpp b/CodeGen/src/AssemblyBuilderA64.cpp index a80003e9..bb7c9439 100644 --- a/CodeGen/src/AssemblyBuilderA64.cpp +++ b/CodeGen/src/AssemblyBuilderA64.cpp @@ -1,6 +1,7 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #include "Luau/AssemblyBuilderA64.h" +#include "BitUtils.h" #include "ByteUtils.h" #include @@ -126,6 +127,15 @@ void AssemblyBuilderA64::csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src placeCS("csel", dst, src1, src2, cond, 0b11010'10'0, 0b00); } +void AssemblyBuilderA64::cset(RegisterA64 dst, ConditionA64 cond) +{ + LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w); + + RegisterA64 src = dst.kind == KindA64::x ? xzr : wzr; + + placeCS("cset", dst, src, src, cond, 0b11010'10'0, 0b01, /* invert= */ 1); +} + void AssemblyBuilderA64::and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) { placeSR3("and", dst, src1, src2, 0b00'01010); @@ -141,11 +151,45 @@ void AssemblyBuilderA64::eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2 placeSR3("eor", dst, src1, src2, 0b10'01010); } +void AssemblyBuilderA64::bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) +{ + placeSR3("bic", dst, src1, src2, 0b00'01010, /* shift= */ 0, /* N= */ 1); +} + +void AssemblyBuilderA64::tst(RegisterA64 src1, RegisterA64 src2) +{ + RegisterA64 dst = src1.kind == KindA64::x ? xzr : wzr; + + placeSR3("tst", dst, src1, src2, 0b11'01010); +} + void AssemblyBuilderA64::mvn(RegisterA64 dst, RegisterA64 src) { placeSR2("mvn", dst, src, 0b01'01010, 0b1); } +void AssemblyBuilderA64::and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2) +{ + placeBM("and", dst, src1, src2, 0b00'100100); +} + +void AssemblyBuilderA64::orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2) +{ + placeBM("orr", dst, src1, src2, 0b01'100100); +} + +void AssemblyBuilderA64::eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2) +{ + placeBM("eor", dst, src1, src2, 0b10'100100); +} + +void AssemblyBuilderA64::tst(RegisterA64 src1, uint32_t src2) +{ + RegisterA64 dst = src1.kind == KindA64::x ? xzr : wzr; + + placeBM("tst", dst, src1, src2, 0b11'100100); +} + void AssemblyBuilderA64::lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) { placeR3("lsl", dst, src1, src2, 0b11010110, 0b0010'00); @@ -583,7 +627,7 @@ void AssemblyBuilderA64::place0(const char* name, uint32_t op) commit(); } -void AssemblyBuilderA64::placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift) +void AssemblyBuilderA64::placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift, int N) { if (logText) log(name, dst, src1, src2, shift); @@ -594,7 +638,7 @@ void AssemblyBuilderA64::placeSR3(const char* name, RegisterA64 dst, RegisterA64 uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0; - place(dst.index | (src1.index << 5) | (shift << 10) | (src2.index << 16) | (op << 24) | sf); + place(dst.index | (src1.index << 5) | (shift << 10) | (src2.index << 16) | (N << 21) | (op << 24) | sf); commit(); } @@ -764,7 +808,8 @@ void AssemblyBuilderA64::placeP(const char* name, RegisterA64 src1, RegisterA64 commit(); } -void AssemblyBuilderA64::placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc) +void AssemblyBuilderA64::placeCS( + const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert) { if (logText) log(name, dst, src1, src2, cond); @@ -773,7 +818,7 @@ void AssemblyBuilderA64::placeCS(const char* name, RegisterA64 dst, RegisterA64 uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0; - place(dst.index | (src1.index << 5) | (opc << 10) | (codeForCondition[int(cond)] << 12) | (src2.index << 16) | (op << 21) | sf); + place(dst.index | (src1.index << 5) | (opc << 10) | ((codeForCondition[int(cond)] ^ invert) << 12) | (src2.index << 16) | (op << 21) | sf); commit(); } @@ -793,6 +838,29 @@ void AssemblyBuilderA64::placeFCMP(const char* name, RegisterA64 src1, RegisterA commit(); } +void AssemblyBuilderA64::placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op) +{ + if (logText) + log(name, dst, src1, src2); + + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x); + LUAU_ASSERT(dst.kind == src1.kind); + + uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0; + + int lz = countlz(src2); + int rz = countrz(src2); + + LUAU_ASSERT(lz + rz > 0 && lz + rz < 32); // must have at least one 0 and at least one 1 + LUAU_ASSERT((src2 >> rz) == (1u << (32 - lz - rz)) - 1u); // sequence of 1s must be contiguous + + int imms = 31 - lz - rz; // count of 1s minus 1 + int immr = (32 - rz) & 31; // right rotate amount + + place(dst.index | (src1.index << 5) | (imms << 10) | (immr << 16) | (op << 23) | sf); + commit(); +} + void AssemblyBuilderA64::place(uint32_t word) { LUAU_ASSERT(codePos < codeEnd); @@ -965,10 +1033,13 @@ void AssemblyBuilderA64::log(const char* opcode, RegisterA64 dst, RegisterA64 sr { logAppend(" %-12s", opcode); log(dst); - text.append(","); - log(src1); - text.append(","); - log(src2); + if ((src1 != wzr && src1 != xzr) || (src2 != wzr && src2 != xzr)) + { + text.append(","); + log(src1); + text.append(","); + log(src2); + } text.append(","); text.append(textForCondition[int(cond)] + 2); // skip b. text.append("\n"); diff --git a/CodeGen/src/AssemblyBuilderX64.cpp b/CodeGen/src/AssemblyBuilderX64.cpp index d86a37c6..ed95004f 100644 --- a/CodeGen/src/AssemblyBuilderX64.cpp +++ b/CodeGen/src/AssemblyBuilderX64.cpp @@ -31,7 +31,8 @@ static_assert(sizeof(setccTextForCondition) / sizeof(setccTextForCondition[0]) = #define OP_PLUS_REG(op, reg) ((op) + (reg & 0x7)) #define OP_PLUS_CC(op, cc) ((op) + uint8_t(cc)) -#define REX_W(value) (value ? 0x8 : 0x0) +#define REX_W_BIT(value) (value ? 0x8 : 0x0) +#define REX_W(reg) REX_W_BIT((reg).size == SizeX64::qword || ((reg).size == SizeX64::byte && (reg).index >= 4)) #define REX_R(reg) (((reg).index & 0x8) >> 1) #define REX_X(reg) (((reg).index & 0x8) >> 2) #define REX_B(reg) (((reg).index & 0x8) >> 3) @@ -1116,7 +1117,7 @@ void AssemblyBuilderX64::placeAvx( void AssemblyBuilderX64::placeRex(RegisterX64 op) { - uint8_t code = REX_W(op.size == SizeX64::qword) | REX_B(op); + uint8_t code = REX_W(op) | REX_B(op); if (code != 0) place(code | 0x40); @@ -1127,9 +1128,9 @@ void AssemblyBuilderX64::placeRex(OperandX64 op) uint8_t code = 0; if (op.cat == CategoryX64::reg) - code = REX_W(op.base.size == SizeX64::qword) | REX_B(op.base); + code = REX_W(op.base) | REX_B(op.base); else if (op.cat == CategoryX64::mem) - code = REX_W(op.memSize == SizeX64::qword) | REX_X(op.index) | REX_B(op.base); + code = REX_W_BIT(op.memSize == SizeX64::qword) | REX_X(op.index) | REX_B(op.base); else LUAU_ASSERT(!"No encoding for left operand of this category"); @@ -1154,7 +1155,7 @@ void AssemblyBuilderX64::placeRexNoW(OperandX64 op) void AssemblyBuilderX64::placeRex(RegisterX64 lhs, OperandX64 rhs) { - uint8_t code = REX_W(lhs.size == SizeX64::qword); + uint8_t code = REX_W(lhs); if (rhs.cat == CategoryX64::imm) code |= REX_B(lhs); diff --git a/CodeGen/src/BitUtils.h b/CodeGen/src/BitUtils.h new file mode 100644 index 00000000..93f7cc8d --- /dev/null +++ b/CodeGen/src/BitUtils.h @@ -0,0 +1,36 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#pragma once + +#include + +#ifdef _MSC_VER +#include +#endif + +namespace Luau +{ +namespace CodeGen +{ + +inline int countlz(uint32_t n) +{ +#ifdef _MSC_VER + unsigned long rl; + return _BitScanReverse(&rl, n) ? 31 - int(rl) : 32; +#else + return n == 0 ? 32 : __builtin_clz(n); +#endif +} + +inline int countrz(uint32_t n) +{ +#ifdef _MSC_VER + unsigned long rl; + return _BitScanForward(&rl, n) ? int(rl) : 32; +#else + return n == 0 ? 32 : __builtin_ctz(n); +#endif +} + +} // namespace CodeGen +} // namespace Luau diff --git a/CodeGen/src/CodeBlockUnwind.cpp b/CodeGen/src/CodeBlockUnwind.cpp index 72842be7..ccd15fac 100644 --- a/CodeGen/src/CodeBlockUnwind.cpp +++ b/CodeGen/src/CodeBlockUnwind.cpp @@ -54,31 +54,6 @@ namespace CodeGen void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& beginOffset) { -#if defined(_WIN32) && defined(_M_X64) - UnwindBuilder* unwind = (UnwindBuilder*)context; - - // All unwinding related data is placed together at the start of the block - size_t unwindSize = sizeof(RUNTIME_FUNCTION) + unwind->getSize(); - unwindSize = (unwindSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1); // Match code allocator alignment - LUAU_ASSERT(blockSize >= unwindSize); - - RUNTIME_FUNCTION* runtimeFunc = (RUNTIME_FUNCTION*)block; - runtimeFunc->BeginAddress = DWORD(unwindSize); // Code will start after the unwind info - runtimeFunc->EndAddress = DWORD(blockSize); // Whole block is a part of a 'single function' - runtimeFunc->UnwindInfoAddress = DWORD(sizeof(RUNTIME_FUNCTION)); // Unwind info is placed at the start of the block - - char* unwindData = (char*)block + runtimeFunc->UnwindInfoAddress; - unwind->finalize(unwindData, block + unwindSize, blockSize - unwindSize); - - if (!RtlAddFunctionTable(runtimeFunc, 1, uintptr_t(block))) - { - LUAU_ASSERT(!"failed to allocate function table"); - return nullptr; - } - - beginOffset = unwindSize + unwind->getBeginOffset(); - return block; -#elif !defined(_WIN32) UnwindBuilder* unwind = (UnwindBuilder*)context; // All unwinding related data is placed together at the start of the block @@ -87,37 +62,34 @@ void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, siz LUAU_ASSERT(blockSize >= unwindSize); char* unwindData = (char*)block; - unwind->finalize(unwindData, block, blockSize); + unwind->finalize(unwindData, unwindSize, block, blockSize); -#if defined(__APPLE__) +#if defined(_WIN32) && defined(_M_X64) + if (!RtlAddFunctionTable((RUNTIME_FUNCTION*)block, uint32_t(unwind->getFunctionCount()), uintptr_t(block))) + { + LUAU_ASSERT(!"failed to allocate function table"); + return nullptr; + } +#elif defined(__APPLE__) visitFdeEntries(unwindData, __register_frame); -#else +#elif !defined(_WIN32) __register_frame(unwindData); #endif beginOffset = unwindSize + unwind->getBeginOffset(); return block; -#endif - - return nullptr; } void destroyBlockUnwindInfo(void* context, void* unwindData) { #if defined(_WIN32) && defined(_M_X64) - RUNTIME_FUNCTION* runtimeFunc = (RUNTIME_FUNCTION*)unwindData; - - if (!RtlDeleteFunctionTable(runtimeFunc)) + if (!RtlDeleteFunctionTable((RUNTIME_FUNCTION*)unwindData)) LUAU_ASSERT(!"failed to deallocate function table"); -#elif !defined(_WIN32) - -#if defined(__APPLE__) +#elif defined(__APPLE__) visitFdeEntries((char*)unwindData, __deregister_frame); -#else +#elif !defined(_WIN32) __deregister_frame(unwindData); #endif - -#endif } } // namespace CodeGen diff --git a/CodeGen/src/CodeGen.cpp b/CodeGen/src/CodeGen.cpp index 8e6e9493..6cd9ea05 100644 --- a/CodeGen/src/CodeGen.cpp +++ b/CodeGen/src/CodeGen.cpp @@ -176,6 +176,10 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& IrInst& inst = function.instructions[index]; + // Substitutions might have meta information about operand restore location from memory + if (inst.cmd == IrCmd::SUBSTITUTE && inst.b.kind != IrOpKind::None) + function.recordRestoreOp(inst.a.index, inst.b); + // Skip pseudo instructions, but make sure they are not used at this stage // This also prevents them from getting into text output when that's enabled if (isPseudo(inst.cmd)) @@ -195,7 +199,18 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& lowering.lowerInst(inst, index, next); if (lowering.hasError()) + { + // Place labels for all blocks that we're skipping + // This is needed to avoid AssemblyBuilder assertions about jumps in earlier blocks with unplaced labels + for (size_t j = i + 1; j < sortedBlocks.size(); ++j) + { + IrBlock& abandoned = function.blocks[sortedBlocks[j]]; + + build.setLabel(abandoned.label); + } + return false; + } } if (options.includeIr) @@ -223,12 +238,8 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& [[maybe_unused]] static bool lowerIr( X64::AssemblyBuilderX64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options) { - constexpr uint32_t kFunctionAlignment = 32; - optimizeMemoryOperandsX64(ir.function); - build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2); - X64::IrLoweringX64 lowering(build, helpers, data, ir.function); return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); @@ -237,9 +248,6 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& [[maybe_unused]] static bool lowerIr( A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options) { - if (!A64::IrLoweringA64::canLower(ir.function)) - return false; - A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function); return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); @@ -432,13 +440,13 @@ void create(lua_State* L) initHelperFunctions(data); #if defined(__x86_64__) || defined(_M_X64) - if (!X64::initEntryFunction(data)) + if (!X64::initHeaderFunctions(data)) { destroyNativeState(L); return; } #elif defined(__aarch64__) - if (!A64::initEntryFunction(data)) + if (!A64::initHeaderFunctions(data)) { destroyNativeState(L); return; diff --git a/CodeGen/src/CodeGenA64.cpp b/CodeGen/src/CodeGenA64.cpp index e7a1e2e2..7f29beb2 100644 --- a/CodeGen/src/CodeGenA64.cpp +++ b/CodeGen/src/CodeGenA64.cpp @@ -17,14 +17,107 @@ namespace CodeGen namespace A64 { -bool initEntryFunction(NativeState& data) +struct EntryLocations { - AssemblyBuilderA64 build(/* logText= */ false); - UnwindBuilder& unwind = *data.unwindBuilder.get(); + Label start; + Label prologueEnd; + Label epilogueStart; +}; + +static void emitExit(AssemblyBuilderA64& build, bool continueInVm) +{ + build.mov(x0, continueInVm); + build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, gateExit))); + build.br(x1); +} + +static void emitInterrupt(AssemblyBuilderA64& build) +{ + // x0 = pc offset + // x1 = return address in native code + // x2 = interrupt + + // Stash return address in rBase; we need to reload rBase anyway + build.mov(rBase, x1); + + // Update savedpc; required in case interrupt errors + build.add(x0, rCode, x0); + build.ldr(x1, mem(rState, offsetof(lua_State, ci))); + build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); + + // Call interrupt + build.mov(x0, rState); + build.mov(w1, -1); + build.blr(x2); + + // Check if we need to exit + Label skip; + build.ldrb(w0, mem(rState, offsetof(lua_State, status))); + build.cbz(w0, skip); + + // L->ci->savedpc-- + // note: recomputing this avoids having to stash x0 + build.ldr(x1, mem(rState, offsetof(lua_State, ci))); + build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc))); + build.sub(x0, x0, sizeof(Instruction)); + build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); + + emitExit(build, /* continueInVm */ false); + + build.setLabel(skip); + + // Return back to caller; rBase has stashed return address + build.mov(x0, rBase); + + emitUpdateBase(build); // interrupt may have reallocated stack + + build.br(x0); +} + +static void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers) +{ + // x0 = closure object to reentry (equal to clvalue(L->ci->func)) + + // If the fallback requested an exit, we need to do this right away + build.cbz(x0, helpers.exitNoContinueVm); + + emitUpdateBase(build); + + // Need to update state of the current function before we jump away + build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto + + build.mov(rClosure, x0); + build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k + build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code + + // Get instruction index from instruction pointer + // To get instruction index from instruction pointer, we need to divide byte offset by 4 + // But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out + build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci + build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc + build.sub(x2, x2, rCode); + build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads + + // We need to check if the new function can be executed natively + // TODO: This can be done earlier in the function flow, to reduce the JIT->VM transition penalty + build.ldr(x1, mem(x1, offsetofProtoExecData)); + build.cbz(x1, helpers.exitContinueVm); + + // Get new instruction location and jump to it + build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets))); + build.ldr(x1, mem(x1, x2)); + build.br(x1); +} + +static EntryLocations buildEntryFunction(AssemblyBuilderA64& build, UnwindBuilder& unwind) +{ + EntryLocations locations; // Arguments: x0 = lua_State*, x1 = Proto*, x2 = native code pointer to jump to, x3 = NativeContext* - unwind.start(); + locations.start = build.setLabel(); + unwind.startFunction(); + unwind.allocStack(8); // TODO: this is just a hack to make UnwindBuilder assertions cooperate // prologue @@ -38,9 +131,7 @@ bool initEntryFunction(NativeState& data) build.mov(x29, sp); // this is only necessary if we maintain frame pointers, which we do in the JIT for now - unwind.finish(); - - size_t prologueSize = build.setLabel().location; + locations.prologueEnd = build.setLabel(); // Setup native execution environment build.mov(rState, x0); @@ -58,7 +149,7 @@ bool initEntryFunction(NativeState& data) build.br(x2); // Even though we jumped away, we will return here in the end - Label returnOff = build.setLabel(); + locations.epilogueStart = build.setLabel(); // Cleanup and exit build.ldp(x23, x24, mem(sp, 48)); @@ -69,12 +160,30 @@ bool initEntryFunction(NativeState& data) build.ret(); + // Our entry function is special, it spans the whole remaining code area + unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton); + + return locations; +} + +bool initHeaderFunctions(NativeState& data) +{ + AssemblyBuilderA64 build(/* logText= */ false); + UnwindBuilder& unwind = *data.unwindBuilder.get(); + + unwind.startInfo(); + + EntryLocations entryLocations = buildEntryFunction(build, unwind); + build.finalize(); + unwind.finishInfo(); + LUAU_ASSERT(build.data.empty()); + uint8_t* codeStart = nullptr; if (!data.codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast(build.code.data()), - int(build.code.size() * sizeof(build.code[0])), data.gateData, data.gateDataSize, data.context.gateEntry)) + int(build.code.size() * sizeof(build.code[0])), data.gateData, data.gateDataSize, codeStart)) { LUAU_ASSERT(!"failed to create entry function"); return false; @@ -82,9 +191,10 @@ bool initEntryFunction(NativeState& data) // Set the offset at the begining so that functions in new blocks will not overlay the locations // specified by the unwind information of the entry function - unwind.setBeginOffset(prologueSize); + unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd)); - data.context.gateExit = data.context.gateEntry + build.getLabelOffset(returnOff); + data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start); + data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart); return true; } diff --git a/CodeGen/src/CodeGenA64.h b/CodeGen/src/CodeGenA64.h index 7b792cc1..f6fda726 100644 --- a/CodeGen/src/CodeGenA64.h +++ b/CodeGen/src/CodeGenA64.h @@ -14,7 +14,7 @@ namespace A64 class AssemblyBuilderA64; -bool initEntryFunction(NativeState& data); +bool initHeaderFunctions(NativeState& data); void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers); } // namespace A64 diff --git a/CodeGen/src/CodeGenUtils.cpp b/CodeGen/src/CodeGenUtils.cpp index ae3dbd45..7a9192ab 100644 --- a/CodeGen/src/CodeGenUtils.cpp +++ b/CodeGen/src/CodeGenUtils.cpp @@ -13,12 +13,58 @@ namespace Luau namespace CodeGen { +bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra) +{ + int sizearray = h->sizearray; + + // first we advance index through the array portion + while (unsigned(index) < unsigned(sizearray)) + { + TValue* e = &h->array[index]; + + if (!ttisnil(e)) + { + setpvalue(ra + 2, reinterpret_cast(uintptr_t(index + 1))); + setnvalue(ra + 3, double(index + 1)); + setobj2s(L, ra + 4, e); + + return true; + } + + index++; + } + + int sizenode = 1 << h->lsizenode; + + // then we advance index through the hash portion + while (unsigned(index - h->sizearray) < unsigned(sizenode)) + { + LuaNode* n = &h->node[index - sizearray]; + + if (!ttisnil(gval(n))) + { + setpvalue(ra + 2, reinterpret_cast(uintptr_t(index + 1))); + getnodekey(L, ra + 3, n); + setobj(L, ra + 4, gval(n)); + + return true; + } + + index++; + } + + return false; +} + bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra) { + int sizearray = h->sizearray; + int sizenode = 1 << h->lsizenode; + // then we advance index through the hash portion - while (unsigned(index - h->sizearray) < unsigned(1 << h->lsizenode)) + while (unsigned(index - sizearray) < unsigned(sizenode)) { - LuaNode* n = &h->node[index - h->sizearray]; + LuaNode* n = &h->node[index - sizearray]; if (!ttisnil(gval(n))) { diff --git a/CodeGen/src/CodeGenUtils.h b/CodeGen/src/CodeGenUtils.h index 6066a691..10e88c13 100644 --- a/CodeGen/src/CodeGenUtils.h +++ b/CodeGen/src/CodeGenUtils.h @@ -8,6 +8,7 @@ namespace Luau namespace CodeGen { +bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra); bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra); bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux); diff --git a/CodeGen/src/CodeGenX64.cpp b/CodeGen/src/CodeGenX64.cpp index 7df1a909..2acb69f9 100644 --- a/CodeGen/src/CodeGenX64.cpp +++ b/CodeGen/src/CodeGenX64.cpp @@ -41,12 +41,21 @@ namespace CodeGen namespace X64 { -bool initEntryFunction(NativeState& data) +struct EntryLocations { - AssemblyBuilderX64 build(/* logText= */ false); - UnwindBuilder& unwind = *data.unwindBuilder.get(); + Label start; + Label prologueEnd; + Label epilogueStart; +}; - unwind.start(); +static EntryLocations buildEntryFunction(AssemblyBuilderX64& build, UnwindBuilder& unwind) +{ + EntryLocations locations; + + build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2); + + locations.start = build.setLabel(); + unwind.startFunction(); // Save common non-volatile registers build.push(rbp); @@ -84,9 +93,7 @@ bool initEntryFunction(NativeState& data) build.sub(rsp, kStackSize + kLocalsSize); unwind.allocStack(kStackSize + kLocalsSize); - unwind.finish(); - - size_t prologueSize = build.setLabel().location; + locations.prologueEnd = build.setLabel(); // Setup native execution environment build.mov(rState, rArg1); @@ -104,7 +111,7 @@ bool initEntryFunction(NativeState& data) build.jmp(rArg3); // Even though we jumped away, we will return here in the end - Label returnOff = build.setLabel(); + locations.epilogueStart = build.setLabel(); // Cleanup and exit build.add(rsp, kStackSize + kLocalsSize); @@ -123,12 +130,30 @@ bool initEntryFunction(NativeState& data) build.pop(rbp); build.ret(); + // Our entry function is special, it spans the whole remaining code area + unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton); + + return locations; +} + +bool initHeaderFunctions(NativeState& data) +{ + AssemblyBuilderX64 build(/* logText= */ false); + UnwindBuilder& unwind = *data.unwindBuilder.get(); + + unwind.startInfo(); + + EntryLocations entryLocations = buildEntryFunction(build, unwind); + build.finalize(); + unwind.finishInfo(); + LUAU_ASSERT(build.data.empty()); - if (!data.codeAllocator.allocate(build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), data.gateData, - data.gateDataSize, data.context.gateEntry)) + uint8_t* codeStart = nullptr; + if (!data.codeAllocator.allocate( + build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), data.gateData, data.gateDataSize, codeStart)) { LUAU_ASSERT(!"failed to create entry function"); return false; @@ -136,9 +161,10 @@ bool initEntryFunction(NativeState& data) // Set the offset at the begining so that functions in new blocks will not overlay the locations // specified by the unwind information of the entry function - unwind.setBeginOffset(prologueSize); + unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd)); - data.context.gateExit = data.context.gateEntry + returnOff.location; + data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start); + data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart); return true; } diff --git a/CodeGen/src/CodeGenX64.h b/CodeGen/src/CodeGenX64.h index 1f483113..1f0f27d9 100644 --- a/CodeGen/src/CodeGenX64.h +++ b/CodeGen/src/CodeGenX64.h @@ -14,7 +14,7 @@ namespace X64 class AssemblyBuilderX64; -bool initEntryFunction(NativeState& data); +bool initHeaderFunctions(NativeState& data); void assembleHelpers(AssemblyBuilderX64& build, ModuleHelpers& helpers); } // namespace X64 diff --git a/CodeGen/src/EmitBuiltinsX64.cpp b/CodeGen/src/EmitBuiltinsX64.cpp index b010ce62..4026b955 100644 --- a/CodeGen/src/EmitBuiltinsX64.cpp +++ b/CodeGen/src/EmitBuiltinsX64.cpp @@ -107,47 +107,11 @@ void emitBuiltinMathLog(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npar regs.assertAllFree(); build.vmovsd(xmm0, luauRegValue(arg)); - if (nparams == 1) - { - build.call(qword[rNativeContext + offsetof(NativeContext, libm_log)]); - } - else - { - Label log10check, logdivlog, exit; - - // Using 'rbx' for non-volatile temporary storage of log(arg1) result - RegisterX64 tmp = rbx; - OperandX64 arg2value = qword[args + offsetof(TValue, value)]; - - build.vmovsd(xmm1, arg2value); - - jumpOnNumberCmp(build, noreg, build.f64(2.0), xmm1, IrCondition::NotEqual, log10check); - + // TODO: IR builtin lowering assumes that the only valid 2-argument call is log2; ideally, we use a less hacky way to indicate that + if (nparams == 2) build.call(qword[rNativeContext + offsetof(NativeContext, libm_log2)]); - build.jmp(exit); - - build.setLabel(log10check); - jumpOnNumberCmp(build, noreg, build.f64(10.0), xmm1, IrCondition::NotEqual, logdivlog); - - build.call(qword[rNativeContext + offsetof(NativeContext, libm_log10)]); - build.jmp(exit); - - build.setLabel(logdivlog); - - // log(arg1) + else build.call(qword[rNativeContext + offsetof(NativeContext, libm_log)]); - build.vmovq(tmp, xmm0); - - // log(arg2) - build.vmovsd(xmm0, arg2value); - build.call(qword[rNativeContext + offsetof(NativeContext, libm_log)]); - - // log(arg1) / log(arg2) - build.vmovq(xmm1, tmp); - build.vdivsd(xmm0, xmm1, xmm0); - - build.setLabel(exit); - } build.vmovsd(luauRegValue(ra), xmm0); } @@ -256,62 +220,68 @@ void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int r switch (bfid) { - case LBF_ASSERT: - case LBF_MATH_DEG: - case LBF_MATH_RAD: - case LBF_MATH_MIN: - case LBF_MATH_MAX: - case LBF_MATH_CLAMP: - case LBF_MATH_FLOOR: - case LBF_MATH_CEIL: - case LBF_MATH_SQRT: - case LBF_MATH_POW: - case LBF_MATH_ABS: - case LBF_MATH_ROUND: - // These instructions are fully translated to IR - break; case LBF_MATH_EXP: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathExp(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_FMOD: + LUAU_ASSERT(nparams == 2 && nresults == 1); return emitBuiltinMathFmod(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_ASIN: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathAsin(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_SIN: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathSin(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_SINH: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathSinh(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_ACOS: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathAcos(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_COS: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathCos(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_COSH: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathCosh(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_ATAN: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathAtan(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_TAN: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathTan(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_TANH: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathTanh(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_ATAN2: + LUAU_ASSERT(nparams == 2 && nresults == 1); return emitBuiltinMathAtan2(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_LOG10: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathLog10(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_LOG: + LUAU_ASSERT((nparams == 1 || nparams == 2) && nresults == 1); return emitBuiltinMathLog(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_LDEXP: + LUAU_ASSERT(nparams == 2 && nresults == 1); return emitBuiltinMathLdexp(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_FREXP: + LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2)); return emitBuiltinMathFrexp(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_MODF: + LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2)); return emitBuiltinMathModf(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_MATH_SIGN: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinMathSign(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_TYPE: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinType(regs, build, nparams, ra, arg, argsOp, nresults); case LBF_TYPEOF: + LUAU_ASSERT(nparams == 1 && nresults == 1); return emitBuiltinTypeof(regs, build, nparams, ra, arg, argsOp, nresults); default: - LUAU_ASSERT(!"missing x64 lowering"); + LUAU_ASSERT(!"Missing x64 lowering"); break; } } diff --git a/CodeGen/src/EmitCommon.h b/CodeGen/src/EmitCommon.h index a71eafd4..6a749669 100644 --- a/CodeGen/src/EmitCommon.h +++ b/CodeGen/src/EmitCommon.h @@ -13,8 +13,8 @@ constexpr unsigned kLuaNodeSizeLog2 = 5; constexpr unsigned kLuaNodeTagMask = 0xf; constexpr unsigned kNextBitOffset = 4; -constexpr unsigned kOffsetOfLuaNodeTag = 12; // offsetof cannot be used on a bit field -constexpr unsigned kOffsetOfLuaNodeNext = 12; // offsetof cannot be used on a bit field +constexpr unsigned kOffsetOfTKeyTag = 12; // offsetof cannot be used on a bit field +constexpr unsigned kOffsetOfTKeyNext = 12; // offsetof cannot be used on a bit field constexpr unsigned kOffsetOfInstructionC = 3; // Leaf functions that are placed in every module to perform common instruction sequences diff --git a/CodeGen/src/EmitCommonA64.cpp b/CodeGen/src/EmitCommonA64.cpp deleted file mode 100644 index 1758e4fb..00000000 --- a/CodeGen/src/EmitCommonA64.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details -#include "EmitCommonA64.h" - -#include "NativeState.h" -#include "CustomExecUtils.h" - -namespace Luau -{ -namespace CodeGen -{ -namespace A64 -{ - -void emitUpdateBase(AssemblyBuilderA64& build) -{ - build.ldr(rBase, mem(rState, offsetof(lua_State, base))); -} - -void emitExit(AssemblyBuilderA64& build, bool continueInVm) -{ - build.mov(x0, continueInVm); - build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, gateExit))); - build.br(x1); -} - -void emitInterrupt(AssemblyBuilderA64& build) -{ - // x0 = pc offset - // x1 = return address in native code - // x2 = interrupt - - // Stash return address in rBase; we need to reload rBase anyway - build.mov(rBase, x1); - - // Update savedpc; required in case interrupt errors - build.add(x0, rCode, x0); - build.ldr(x1, mem(rState, offsetof(lua_State, ci))); - build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); - - // Call interrupt - build.mov(x0, rState); - build.mov(w1, -1); - build.blr(x2); - - // Check if we need to exit - Label skip; - build.ldrb(w0, mem(rState, offsetof(lua_State, status))); - build.cbz(w0, skip); - - // L->ci->savedpc-- - // note: recomputing this avoids having to stash x0 - build.ldr(x1, mem(rState, offsetof(lua_State, ci))); - build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc))); - build.sub(x0, x0, sizeof(Instruction)); - build.str(x0, mem(x1, offsetof(CallInfo, savedpc))); - - emitExit(build, /* continueInVm */ false); - - build.setLabel(skip); - - // Return back to caller; rBase has stashed return address - build.mov(x0, rBase); - - emitUpdateBase(build); // interrupt may have reallocated stack - - build.br(x0); -} - -void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers) -{ - // x0 = closure object to reentry (equal to clvalue(L->ci->func)) - - // If the fallback requested an exit, we need to do this right away - build.cbz(x0, helpers.exitNoContinueVm); - - emitUpdateBase(build); - - // Need to update state of the current function before we jump away - build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto - - build.mov(rClosure, x0); - build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k - build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code - - // Get instruction index from instruction pointer - // To get instruction index from instruction pointer, we need to divide byte offset by 4 - // But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out - build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci - build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc - build.sub(x2, x2, rCode); - build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads - - // We need to check if the new function can be executed natively - // TODO: This can be done earlier in the function flow, to reduce the JIT->VM transition penalty - build.ldr(x1, mem(x1, offsetofProtoExecData)); - build.cbz(x1, helpers.exitContinueVm); - - // Get new instruction location and jump to it - build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets))); - build.ldr(x1, mem(x1, x2)); - build.br(x1); -} - -void emitFallback(AssemblyBuilderA64& build, int op, int pcpos) -{ - // fallback(L, instruction, base, k) - build.mov(x0, rState); - - // TODO: refactor into a common helper - if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate) - { - build.add(x1, rCode, uint16_t(pcpos * sizeof(Instruction))); - } - else - { - build.mov(x1, pcpos * sizeof(Instruction)); - build.add(x1, rCode, x1); - } - - build.mov(x2, rBase); - build.mov(x3, rConstants); - build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, fallback) + op * sizeof(NativeFallback) + offsetof(NativeFallback, fallback))); - build.blr(x4); - - emitUpdateBase(build); -} - -} // namespace A64 -} // namespace CodeGen -} // namespace Luau diff --git a/CodeGen/src/EmitCommonA64.h b/CodeGen/src/EmitCommonA64.h index 2a65afa8..8cb54c1d 100644 --- a/CodeGen/src/EmitCommonA64.h +++ b/CodeGen/src/EmitCommonA64.h @@ -7,6 +7,7 @@ #include "lobject.h" #include "ltm.h" +#include "lstate.h" // AArch64 ABI reminder: // Arguments: x0-x7, v0-v7 @@ -38,15 +39,19 @@ constexpr RegisterA64 rBase = x24; // StkId base // Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point // See CodeGenA64.cpp for layout -constexpr unsigned kStackSize = 64; // 8 stashed registers +constexpr unsigned kStashSlots = 8; // stashed non-volatile registers +constexpr unsigned kSpillSlots = 0; // slots for spilling temporary registers (unused) +constexpr unsigned kTempSlots = 2; // 16 bytes of temporary space, such luxury! -void emitUpdateBase(AssemblyBuilderA64& build); +constexpr unsigned kStackSize = (kStashSlots + kSpillSlots + kTempSlots) * 8; -// TODO: Move these to CodeGenA64 so that they can't be accidentally called during lowering -void emitExit(AssemblyBuilderA64& build, bool continueInVm); -void emitInterrupt(AssemblyBuilderA64& build); -void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers); -void emitFallback(AssemblyBuilderA64& build, int op, int pcpos); +constexpr AddressA64 sSpillArea = mem(sp, kStashSlots * 8); +constexpr AddressA64 sTemporary = mem(sp, (kStashSlots + kSpillSlots) * 8); + +inline void emitUpdateBase(AssemblyBuilderA64& build) +{ + build.ldr(rBase, mem(rState, offsetof(lua_State, base))); +} } // namespace A64 } // namespace CodeGen diff --git a/CodeGen/src/EmitCommonX64.cpp b/CodeGen/src/EmitCommonX64.cpp index 9136add8..b6d8b85e 100644 --- a/CodeGen/src/EmitCommonX64.cpp +++ b/CodeGen/src/EmitCommonX64.cpp @@ -279,32 +279,37 @@ void emitUpdateBase(AssemblyBuilderX64& build) build.mov(rBase, qword[rState + offsetof(lua_State, base)]); } -// Note: only uses rax/rdx, the caller may use other registers -static void emitSetSavedPc(AssemblyBuilderX64& build, int pcpos) +static void emitSetSavedPc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos) { - build.mov(rdx, sCode); - build.add(rdx, pcpos * sizeof(Instruction)); - build.mov(rax, qword[rState + offsetof(lua_State, ci)]); - build.mov(qword[rax + offsetof(CallInfo, savedpc)], rdx); + ScopedRegX64 tmp1{regs, SizeX64::qword}; + ScopedRegX64 tmp2{regs, SizeX64::qword}; + + build.mov(tmp1.reg, sCode); + build.add(tmp1.reg, pcpos * sizeof(Instruction)); + build.mov(tmp2.reg, qword[rState + offsetof(lua_State, ci)]); + build.mov(qword[tmp2.reg + offsetof(CallInfo, savedpc)], tmp1.reg); } -void emitInterrupt(AssemblyBuilderX64& build, int pcpos) +void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos) { Label skip; + ScopedRegX64 tmp{regs, SizeX64::qword}; + // Skip if there is no interrupt set - build.mov(r8, qword[rState + offsetof(lua_State, global)]); - build.mov(r8, qword[r8 + offsetof(global_State, cb.interrupt)]); - build.test(r8, r8); + build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]); + build.mov(tmp.reg, qword[tmp.reg + offsetof(global_State, cb.interrupt)]); + build.test(tmp.reg, tmp.reg); build.jcc(ConditionX64::Zero, skip); - emitSetSavedPc(build, pcpos + 1); // uses rax/rdx + emitSetSavedPc(regs, build, pcpos + 1); // Call interrupt // TODO: This code should move to the end of the function, or even be outlined so that it can be shared by multiple interruptible instructions - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), -1); // function accepts 'int' here and using qword reg would've forced 8 byte constant here - build.call(r8); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, -1); + callWrap.call(tmp.release()); emitUpdateBase(build); // interrupt may have reallocated stack @@ -320,41 +325,23 @@ void emitInterrupt(AssemblyBuilderX64& build, int pcpos) build.setLabel(skip); } -void emitFallback(AssemblyBuilderX64& build, NativeState& data, int op, int pcpos) +void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, NativeState& data, int op, int pcpos) { - NativeFallback& opinfo = data.context.fallback[op]; - LUAU_ASSERT(opinfo.fallback); - - if (build.logText) - build.logAppend("; fallback\n"); + LUAU_ASSERT(data.context.fallback[op]); // fallback(L, instruction, base, k) - build.mov(rArg1, rState); - build.mov(rArg2, sCode); - build.add(rArg2, pcpos * sizeof(Instruction)); - build.mov(rArg3, rBase); - build.mov(rArg4, rConstants); - build.call(qword[rNativeContext + offsetof(NativeContext, fallback) + op * sizeof(NativeFallback) + offsetof(NativeFallback, fallback)]); + IrCallWrapperX64 callWrap(regs, build); + callWrap.addArgument(SizeX64::qword, rState); + + RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword); + build.mov(reg, sCode); + callWrap.addArgument(SizeX64::qword, addr[reg + pcpos * sizeof(Instruction)]); + + callWrap.addArgument(SizeX64::qword, rBase); + callWrap.addArgument(SizeX64::qword, rConstants); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, fallback) + op * sizeof(FallbackFn)]); emitUpdateBase(build); - - // Some instructions may jump to a different instruction or a completely different function - if (opinfo.flags & kFallbackUpdatePc) - { - build.mov(rcx, sClosure); - build.mov(rcx, qword[rcx + offsetof(Closure, l.p)]); - - // Get instruction index from returned instruction pointer - // To get instruction index from instruction pointer, we need to divide byte offset by 4 - // But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out - build.sub(rax, sCode); - - build.mov(rdx, qword[rcx + offsetofProtoExecData]); - - // Get new instruction location and jump to it - build.mov(rcx, qword[rdx + offsetof(NativeProto, instTargets)]); - build.jmp(qword[rax * 2 + rcx]); - } } void emitContinueCallInVm(AssemblyBuilderX64& build) diff --git a/CodeGen/src/EmitCommonX64.h b/CodeGen/src/EmitCommonX64.h index 6aac5a1e..d4684fe8 100644 --- a/CodeGen/src/EmitCommonX64.h +++ b/CodeGen/src/EmitCommonX64.h @@ -34,6 +34,8 @@ namespace X64 struct IrRegAllocX64; +constexpr uint32_t kFunctionAlignment = 32; + // Data that is very common to access is placed in non-volatile registers constexpr RegisterX64 rState = r15; // lua_State* L constexpr RegisterX64 rBase = r14; // StkId base @@ -134,7 +136,7 @@ inline OperandX64 luauNodeKeyValue(RegisterX64 node) // Note: tag has dirty upper bits inline OperandX64 luauNodeKeyTag(RegisterX64 node) { - return dword[node + offsetof(LuaNode, key) + kOffsetOfLuaNodeTag]; + return dword[node + offsetof(LuaNode, key) + kOffsetOfTKeyTag]; } inline OperandX64 luauNodeValue(RegisterX64 node) @@ -162,12 +164,6 @@ inline void jumpIfTagIsNot(AssemblyBuilderX64& build, int ri, lua_Type tag, Labe build.jcc(ConditionX64::NotEqual, label); } -inline void jumpIfTagIsNot(AssemblyBuilderX64& build, RegisterX64 reg, lua_Type tag, Label& label) -{ - build.cmp(dword[reg + offsetof(TValue, tt)], tag); - build.jcc(ConditionX64::NotEqual, label); -} - // Note: fallthrough label should be placed after this condition inline void jumpIfFalsy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough) { @@ -188,26 +184,6 @@ inline void jumpIfTruthy(AssemblyBuilderX64& build, int ri, Label& target, Label build.jcc(ConditionX64::NotEqual, target); // true if boolean value is 'true' } -inline void jumpIfMetatablePresent(AssemblyBuilderX64& build, RegisterX64 table, Label& target) -{ - build.cmp(qword[table + offsetof(Table, metatable)], 0); - build.jcc(ConditionX64::NotEqual, target); -} - -inline void jumpIfUnsafeEnv(AssemblyBuilderX64& build, RegisterX64 tmp, Label& label) -{ - build.mov(tmp, sClosure); - build.mov(tmp, qword[tmp + offsetof(Closure, env)]); - build.test(byte[tmp + offsetof(Table, safeenv)], 1); - build.jcc(ConditionX64::Zero, label); // Not a safe environment -} - -inline void jumpIfTableIsReadOnly(AssemblyBuilderX64& build, RegisterX64 table, Label& label) -{ - build.cmp(byte[table + offsetof(Table, readonly)], 0); - build.jcc(ConditionX64::NotEqual, label); -} - inline void jumpIfNodeKeyTagIsNot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, lua_Type tag, Label& label) { tmp.size = SizeX64::dword; @@ -224,13 +200,6 @@ inline void jumpIfNodeValueTagIs(AssemblyBuilderX64& build, RegisterX64 node, lu build.jcc(ConditionX64::Equal, label); } -inline void jumpIfNodeHasNext(AssemblyBuilderX64& build, RegisterX64 node, Label& label) -{ - build.mov(ecx, dword[node + offsetof(LuaNode, key) + kOffsetOfLuaNodeNext]); - build.shr(ecx, kNextBitOffset); - build.jcc(ConditionX64::NotZero, label); -} - inline void jumpIfNodeKeyNotInExpectedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, OperandX64 expectedKey, Label& label) { jumpIfNodeKeyTagIsNot(build, tmp, node, LUA_TSTRING, label); @@ -260,8 +229,8 @@ void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build); void emitExit(AssemblyBuilderX64& build, bool continueInVm); void emitUpdateBase(AssemblyBuilderX64& build); -void emitInterrupt(AssemblyBuilderX64& build, int pcpos); -void emitFallback(AssemblyBuilderX64& build, NativeState& data, int op, int pcpos); +void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos); +void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, NativeState& data, int op, int pcpos); void emitContinueCallInVm(AssemblyBuilderX64& build); diff --git a/CodeGen/src/EmitInstructionA64.cpp b/CodeGen/src/EmitInstructionA64.cpp deleted file mode 100644 index 400ba77e..00000000 --- a/CodeGen/src/EmitInstructionA64.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details -#include "EmitInstructionA64.h" - -#include "Luau/AssemblyBuilderA64.h" - -#include "EmitCommonA64.h" -#include "NativeState.h" -#include "CustomExecUtils.h" - -namespace Luau -{ -namespace CodeGen -{ -namespace A64 -{ - -void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int n) -{ - // callFallback(L, ra, n) - build.mov(x0, rState); - build.add(x1, rBase, uint16_t(ra * sizeof(TValue))); - build.mov(w2, n); - build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback))); - build.blr(x3); - - // reentry with x0=closure (NULL will trigger exit) - build.b(helpers.reentry); -} - -void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults) -{ - // argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams; - if (nparams == LUA_MULTRET) - build.ldr(x2, mem(rState, offsetof(lua_State, top))); - else - build.add(x2, rBase, uint16_t((ra + 1 + nparams) * sizeof(TValue))); - - // callFallback(L, ra, argtop, nresults) - build.mov(x0, rState); - build.add(x1, rBase, uint16_t(ra * sizeof(TValue))); - build.mov(w3, nresults); - build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback))); - build.blr(x4); - - // reentry with x0=closure (NULL will trigger exit) - build.b(helpers.reentry); -} - -void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux) -{ - // luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false) - build.mov(x0, rState); - build.ldr(x1, mem(rClosure, offsetof(Closure, env))); - build.mov(x2, rConstants); - build.mov(w3, aux); - build.mov(w4, 0); - build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_getimport))); - build.blr(x5); - - emitUpdateBase(build); - - // setobj2s(L, ra, L->top - 1) - build.ldr(x0, mem(rState, offsetof(lua_State, top))); - build.sub(x0, x0, sizeof(TValue)); - build.ldr(q0, x0); - build.str(q0, mem(rBase, ra * sizeof(TValue))); - - // L->top-- - build.str(x0, mem(rState, offsetof(lua_State, top))); -} - -} // namespace A64 -} // namespace CodeGen -} // namespace Luau diff --git a/CodeGen/src/EmitInstructionA64.h b/CodeGen/src/EmitInstructionA64.h deleted file mode 100644 index 278d8e8e..00000000 --- a/CodeGen/src/EmitInstructionA64.h +++ /dev/null @@ -1,24 +0,0 @@ -// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details -#pragma once - -#include - -namespace Luau -{ -namespace CodeGen -{ - -struct ModuleHelpers; - -namespace A64 -{ - -class AssemblyBuilderA64; - -void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int n); -void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults); -void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux); - -} // namespace A64 -} // namespace CodeGen -} // namespace Luau diff --git a/CodeGen/src/EmitInstructionX64.cpp b/CodeGen/src/EmitInstructionX64.cpp index c0a64274..9a10bfdc 100644 --- a/CodeGen/src/EmitInstructionX64.cpp +++ b/CodeGen/src/EmitInstructionX64.cpp @@ -415,7 +415,7 @@ void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int callBarrierTableFast(regs, build, table, {}); } -void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit) +void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat) { // ipairs-style traversal is handled in IR LUAU_ASSERT(aux >= 0); @@ -484,78 +484,6 @@ void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRep build.jcc(ConditionX64::NotZero, loopRepeat); } -void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat) -{ - build.mov(rArg1, rState); - build.mov(dwordReg(rArg2), ra); - build.mov(dwordReg(rArg3), aux); - build.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNonTableFallback)]); - emitUpdateBase(build); - build.test(al, al); - build.jcc(ConditionX64::NotZero, loopRepeat); -} - -void emitInstForGPrepXnextFallback(AssemblyBuilderX64& build, int pcpos, int ra, Label& target) -{ - build.mov(rArg1, rState); - build.lea(rArg2, luauRegAddress(ra)); - build.mov(dwordReg(rArg3), pcpos + 1); - build.call(qword[rNativeContext + offsetof(NativeContext, forgPrepXnextFallback)]); - build.jmp(target); -} - -void emitInstGetImportFallback(AssemblyBuilderX64& build, int ra, uint32_t aux) -{ - build.mov(rax, sClosure); - - // luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false) - build.mov(rArg1, rState); - build.mov(rArg2, qword[rax + offsetof(Closure, env)]); - build.mov(rArg3, rConstants); - build.mov(dwordReg(rArg4), aux); - - if (build.abi == ABIX64::Windows) - build.mov(sArg5, 0); - else - build.xor_(rArg5, rArg5); - - build.call(qword[rNativeContext + offsetof(NativeContext, luaV_getimport)]); - - emitUpdateBase(build); - - // setobj2s(L, ra, L->top - 1) - build.mov(rax, qword[rState + offsetof(lua_State, top)]); - build.sub(rax, sizeof(TValue)); - build.vmovups(xmm0, xmmword[rax]); - build.vmovups(luauReg(ra), xmm0); - - // L->top-- - build.mov(qword[rState + offsetof(lua_State, top)], rax); -} - -void emitInstCoverage(AssemblyBuilderX64& build, int pcpos) -{ - build.mov(rcx, sCode); - build.add(rcx, pcpos * sizeof(Instruction)); - - // hits = LUAU_INSN_E(*pc) - build.mov(edx, dword[rcx]); - build.sar(edx, 8); - - // hits = (hits < (1 << 23) - 1) ? hits + 1 : hits; - build.xor_(eax, eax); - build.cmp(edx, (1 << 23) - 1); - build.setcc(ConditionX64::NotEqual, al); - build.add(edx, eax); - - - // VM_PATCH_E(pc, hits); - build.sal(edx, 8); - build.movzx(eax, byte[rcx]); - build.or_(eax, edx); - build.mov(dword[rcx], eax); -} - } // namespace X64 } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/EmitInstructionX64.h b/CodeGen/src/EmitInstructionX64.h index d58e1331..84fe1130 100644 --- a/CodeGen/src/EmitInstructionX64.h +++ b/CodeGen/src/EmitInstructionX64.h @@ -20,11 +20,7 @@ struct IrRegAllocX64; void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults); void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults); void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index); -void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit); -void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat); -void emitInstForGPrepXnextFallback(AssemblyBuilderX64& build, int pcpos, int ra, Label& target); -void emitInstGetImportFallback(AssemblyBuilderX64& build, int ra, uint32_t aux); -void emitInstCoverage(AssemblyBuilderX64& build, int pcpos); +void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat); } // namespace X64 } // namespace CodeGen diff --git a/CodeGen/src/Fallbacks.cpp b/CodeGen/src/Fallbacks.cpp index e84ee213..1c0dce57 100644 --- a/CodeGen/src/Fallbacks.cpp +++ b/CodeGen/src/Fallbacks.cpp @@ -416,6 +416,44 @@ const Instruction* execute_LOP_NAMECALL(lua_State* L, const Instruction* pc, Stk return pc; } +const Instruction* execute_LOP_SETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k) +{ + [[maybe_unused]] Closure* cl = clvalue(L->ci->func); + Instruction insn = *pc++; + StkId ra = VM_REG(LUAU_INSN_A(insn)); + StkId rb = &base[LUAU_INSN_B(insn)]; // note: this can point to L->top if c == LUA_MULTRET making VM_REG unsafe to use + int c = LUAU_INSN_C(insn) - 1; + uint32_t index = *pc++; + + if (c == LUA_MULTRET) + { + c = int(L->top - rb); + L->top = L->ci->top; + } + + Table* h = hvalue(ra); + + // TODO: we really don't need this anymore + if (!ttistable(ra)) + return NULL; // temporary workaround to weaken a rather powerful exploitation primitive in case of a MITM attack on bytecode + + int last = index + c - 1; + if (last > h->sizearray) + { + VM_PROTECT_PC(); // luaH_resizearray may fail due to OOM + + luaH_resizearray(L, h, last); + } + + TValue* array = h->array; + + for (int i = 0; i < c; ++i) + setobj2t(L, &array[index + i - 1], rb + i); + + luaC_barrierfast(L, h); + return pc; +} + const Instruction* execute_LOP_FORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k) { [[maybe_unused]] Closure* cl = clvalue(L->ci->func); diff --git a/CodeGen/src/Fallbacks.h b/CodeGen/src/Fallbacks.h index bfc0e2b7..0d2d218a 100644 --- a/CodeGen/src/Fallbacks.h +++ b/CodeGen/src/Fallbacks.h @@ -16,6 +16,7 @@ const Instruction* execute_LOP_GETTABLEKS(lua_State* L, const Instruction* pc, S const Instruction* execute_LOP_SETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k); const Instruction* execute_LOP_NEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k); const Instruction* execute_LOP_NAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k); +const Instruction* execute_LOP_SETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k); const Instruction* execute_LOP_FORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k); const Instruction* execute_LOP_GETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k); const Instruction* execute_LOP_DUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k); diff --git a/CodeGen/src/IrAnalysis.cpp b/CodeGen/src/IrAnalysis.cpp index 2246e5c5..f3870e96 100644 --- a/CodeGen/src/IrAnalysis.cpp +++ b/CodeGen/src/IrAnalysis.cpp @@ -354,6 +354,8 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& case IrCmd::RETURN: useRange(vmRegOp(inst.a), function.intOp(inst.b)); break; + + // TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it case IrCmd::FASTCALL: case IrCmd::INVOKE_FASTCALL: if (int count = function.intOp(inst.e); count != -1) diff --git a/CodeGen/src/IrBuilder.cpp b/CodeGen/src/IrBuilder.cpp index 48c0e25c..d86dfe05 100644 --- a/CodeGen/src/IrBuilder.cpp +++ b/CodeGen/src/IrBuilder.cpp @@ -468,7 +468,8 @@ void IrBuilder::clone(const IrBlock& source, bool removeCurrentTerminator) IrInst clone = function.instructions[index]; // Skip pseudo instructions to make clone more compact, but validate that they have no users - if (isPseudo(clone.cmd)) + // But if substitution tracks a location, that tracking has to be preserved + if (isPseudo(clone.cmd) && !(clone.cmd == IrCmd::SUBSTITUTE && clone.b.kind != IrOpKind::None)) { LUAU_ASSERT(clone.useCount == 0); continue; diff --git a/CodeGen/src/IrCallWrapperX64.cpp b/CodeGen/src/IrCallWrapperX64.cpp index 8ac5f8bc..f466df4a 100644 --- a/CodeGen/src/IrCallWrapperX64.cpp +++ b/CodeGen/src/IrCallWrapperX64.cpp @@ -13,6 +13,10 @@ namespace CodeGen namespace X64 { +static const std::array kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]}; +static const std::array kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9}; +static const std::array kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV + static bool sameUnderlyingRegister(RegisterX64 a, RegisterX64 b) { SizeX64 underlyingSizeA = a.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword; @@ -37,21 +41,35 @@ void IrCallWrapperX64::addArgument(SizeX64 targetSize, OperandX64 source, IrOp s LUAU_ASSERT(instIdx != kInvalidInstIdx || sourceOp.kind == IrOpKind::None); LUAU_ASSERT(argCount < kMaxCallArguments); - args[argCount++] = {targetSize, source, sourceOp}; + CallArgument& arg = args[argCount++]; + arg = {targetSize, source, sourceOp}; + + arg.target = getNextArgumentTarget(targetSize); + + if (build.abi == ABIX64::Windows) + { + // On Windows, gpr/xmm register positions move in sync + gprPos++; + xmmPos++; + } + else + { + if (targetSize == SizeX64::xmmword) + xmmPos++; + else + gprPos++; + } } void IrCallWrapperX64::addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg) { - LUAU_ASSERT(argCount < kMaxCallArguments); - args[argCount++] = {targetSize, scopedReg.release(), {}}; + addArgument(targetSize, scopedReg.release(), {}); } void IrCallWrapperX64::call(const OperandX64& func) { funcOp = func; - assignTargetRegisters(); - countRegisterUses(); for (int i = 0; i < argCount; ++i) @@ -190,44 +208,33 @@ void IrCallWrapperX64::call(const OperandX64& func) build.call(funcOp); } -void IrCallWrapperX64::assignTargetRegisters() +RegisterX64 IrCallWrapperX64::suggestNextArgumentRegister(SizeX64 size) const { - static const std::array kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]}; - static const std::array kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9}; + OperandX64 target = getNextArgumentTarget(size); + + return target.cat == CategoryX64::reg ? regs.takeReg(target.base, kInvalidInstIdx) : regs.allocReg(size, kInvalidInstIdx); +} + +OperandX64 IrCallWrapperX64::getNextArgumentTarget(SizeX64 size) const +{ + if (size == SizeX64::xmmword) + { + LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size()); + return kXmmOrder[xmmPos]; + } const std::array& gprOrder = build.abi == ABIX64::Windows ? kWindowsGprOrder : kSystemvGprOrder; - static const std::array kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV - int gprPos = 0; - int xmmPos = 0; + LUAU_ASSERT(size_t(gprPos) < gprOrder.size()); + OperandX64 target = gprOrder[gprPos]; - for (int i = 0; i < argCount; i++) - { - CallArgument& arg = args[i]; + // Keep requested argument size + if (target.cat == CategoryX64::reg) + target.base.size = size; + else if (target.cat == CategoryX64::mem) + target.memSize = size; - if (arg.targetSize == SizeX64::xmmword) - { - LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size()); - arg.target = kXmmOrder[xmmPos++]; - - if (build.abi == ABIX64::Windows) - gprPos++; // On Windows, gpr/xmm register positions move in sync - } - else - { - LUAU_ASSERT(size_t(gprPos) < gprOrder.size()); - arg.target = gprOrder[gprPos++]; - - if (build.abi == ABIX64::Windows) - xmmPos++; // On Windows, gpr/xmm register positions move in sync - - // Keep requested argument size - if (arg.target.cat == CategoryX64::reg) - arg.target.base.size = arg.targetSize; - else if (arg.target.cat == CategoryX64::mem) - arg.target.memSize = arg.targetSize; - } - } + return target; } void IrCallWrapperX64::countRegisterUses() @@ -376,7 +383,7 @@ RegisterX64 IrCallWrapperX64::findConflictingTarget() const void IrCallWrapperX64::renameConflictingRegister(RegisterX64 conflict) { // Get a fresh register - RegisterX64 freshReg = conflict.size == SizeX64::xmmword ? regs.allocXmmReg(kInvalidInstIdx) : regs.allocGprReg(conflict.size, kInvalidInstIdx); + RegisterX64 freshReg = regs.allocReg(conflict.size, kInvalidInstIdx); if (conflict.size == SizeX64::xmmword) build.vmovsd(freshReg, conflict, conflict); diff --git a/CodeGen/src/IrLoweringA64.cpp b/CodeGen/src/IrLoweringA64.cpp index 7f0305cc..3f05d537 100644 --- a/CodeGen/src/IrLoweringA64.cpp +++ b/CodeGen/src/IrLoweringA64.cpp @@ -8,7 +8,6 @@ #include "Luau/IrUtils.h" #include "EmitCommonA64.h" -#include "EmitInstructionA64.h" #include "NativeState.h" #include "lstate.h" @@ -27,13 +26,14 @@ namespace A64 #ifdef TRACE struct LoweringStatsA64 { - size_t can; + size_t missing; size_t total; ~LoweringStatsA64() { if (total) - printf("A64 lowering succeeded for %.1f%% functions (%d/%d)\n", double(can) / double(total) * 100, int(can), int(total)); + printf("A64 lowering succeeded for %.1f%% functions (%d/%d)\n", double(total - missing) / double(total) * 100, int(total - missing), + int(total)); } } gStatsA64; #endif @@ -78,32 +78,230 @@ inline ConditionA64 getConditionFP(IrCondition cond) } } -// TODO: instead of temp1/temp2 we can take a register that we will use for ra->value; that way callers to this function will be able to use it when -// calling luaC_barrier* -static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp1, RegisterA64 temp2, int ra, Label& skip) +static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp, int ra, Label& skip) { - RegisterA64 temp1w = castReg(KindA64::w, temp1); - RegisterA64 temp2w = castReg(KindA64::w, temp2); + RegisterA64 tempw = castReg(KindA64::w, temp); // iscollectable(ra) - build.ldr(temp1w, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, tt))); - build.cmp(temp1w, LUA_TSTRING); + build.ldr(tempw, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, tt))); + build.cmp(tempw, LUA_TSTRING); build.b(ConditionA64::Less, skip); // isblack(obj2gco(o)) // TODO: conditional bit test with BLACKBIT - build.ldrb(temp1w, mem(object, offsetof(GCheader, marked))); - build.mov(temp2w, bitmask(BLACKBIT)); - build.and_(temp1w, temp1w, temp2w); - build.cbz(temp1w, skip); + build.ldrb(tempw, mem(object, offsetof(GCheader, marked))); + build.tst(tempw, bitmask(BLACKBIT)); + build.b(ConditionA64::Equal, skip); // Equal = Zero after tst // iswhite(gcvalue(ra)) - // TODO: tst with bitmask(WHITE0BIT, WHITE1BIT) - build.ldr(temp1, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, value))); - build.ldrb(temp1w, mem(temp1, offsetof(GCheader, marked))); - build.mov(temp2w, bit2mask(WHITE0BIT, WHITE1BIT)); - build.and_(temp1w, temp1w, temp2w); - build.cbz(temp1w, skip); + build.ldr(temp, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, value))); + build.ldrb(tempw, mem(temp, offsetof(GCheader, marked))); + build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT)); + build.b(ConditionA64::Equal, skip); // Equal = Zero after tst +} + +static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset) +{ + LUAU_ASSERT(dst != src); + LUAU_ASSERT(offset <= INT_MAX); + + if (offset <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(dst, src, uint16_t(offset)); + } + else + { + build.mov(dst, int(offset)); + build.add(dst, dst, src); + } +} + +static void emitFallback(AssemblyBuilderA64& build, int op, int pcpos) +{ + // fallback(L, instruction, base, k) + build.mov(x0, rState); + emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction)); + build.mov(x2, rBase); + build.mov(x3, rConstants); + build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, fallback) + op * sizeof(FallbackFn))); + build.blr(x4); + + emitUpdateBase(build); +} + +static void emitInvokeLibm1(AssemblyBuilderA64& build, size_t func, int res, int arg) +{ + build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n))); + build.ldr(x0, mem(rNativeContext, uint32_t(func))); + build.blr(x0); + build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n))); +} + +static void emitInvokeLibm2(AssemblyBuilderA64& build, size_t func, int res, int arg, IrOp args, bool argsInt = false) +{ + if (args.kind == IrOpKind::VmReg) + build.ldr(d1, mem(rBase, args.index * sizeof(TValue) + offsetof(TValue, value.n))); + else if (args.kind == IrOpKind::VmConst) + { + size_t constantOffset = args.index * sizeof(TValue) + offsetof(TValue, value.n); + + // Note: cumulative offset is guaranteed to be divisible by 8 (since we're loading a double); we can use that to expand the useful range that + // doesn't require temporaries + if (constantOffset / 8 <= AddressA64::kMaxOffset) + { + build.ldr(d1, mem(rConstants, int(constantOffset))); + } + else + { + emitAddOffset(build, x0, rConstants, constantOffset); + build.ldr(d1, x0); + } + } + else + LUAU_ASSERT(!"Unsupported instruction form"); + + if (argsInt) + build.fcvtzs(w0, d1); + + build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n))); + build.ldr(x1, mem(rNativeContext, uint32_t(func))); + build.blr(x1); + build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n))); +} + +static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg) +{ + build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n))); + build.add(x0, sp, sTemporary.data); // sp-relative offset + build.ldr(x1, mem(rNativeContext, uint32_t(func))); + build.blr(x1); +} + +static bool emitBuiltin(AssemblyBuilderA64& build, IrRegAllocA64& regs, int bfid, int res, int arg, IrOp args, int nparams, int nresults) +{ + switch (bfid) + { + case LBF_MATH_EXP: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_exp), res, arg); + return true; + case LBF_MATH_FMOD: + LUAU_ASSERT(nparams == 2 && nresults == 1); + emitInvokeLibm2(build, offsetof(NativeContext, libm_fmod), res, arg, args); + return true; + case LBF_MATH_ASIN: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_asin), res, arg); + return true; + case LBF_MATH_SIN: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_sin), res, arg); + return true; + case LBF_MATH_SINH: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_sinh), res, arg); + return true; + case LBF_MATH_ACOS: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_acos), res, arg); + return true; + case LBF_MATH_COS: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_cos), res, arg); + return true; + case LBF_MATH_COSH: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_cosh), res, arg); + return true; + case LBF_MATH_ATAN: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_atan), res, arg); + return true; + case LBF_MATH_TAN: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_tan), res, arg); + return true; + case LBF_MATH_TANH: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_tanh), res, arg); + return true; + case LBF_MATH_ATAN2: + LUAU_ASSERT(nparams == 2 && nresults == 1); + emitInvokeLibm2(build, offsetof(NativeContext, libm_atan2), res, arg, args); + return true; + case LBF_MATH_LOG10: + LUAU_ASSERT(nparams == 1 && nresults == 1); + emitInvokeLibm1(build, offsetof(NativeContext, libm_log10), res, arg); + return true; + case LBF_MATH_LOG: + LUAU_ASSERT((nparams == 1 || nparams == 2) && nresults == 1); + // TODO: IR builtin lowering assumes that the only valid 2-argument call is log2; ideally, we use a less hacky way to indicate that + if (nparams == 2) + emitInvokeLibm1(build, offsetof(NativeContext, libm_log2), res, arg); + else + emitInvokeLibm1(build, offsetof(NativeContext, libm_log), res, arg); + return true; + case LBF_MATH_LDEXP: + LUAU_ASSERT(nparams == 2 && nresults == 1); + emitInvokeLibm2(build, offsetof(NativeContext, libm_ldexp), res, arg, args, /* argsInt= */ true); + return true; + case LBF_MATH_FREXP: + LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2)); + emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg); + build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n))); + if (nresults == 2) + { + build.ldr(w0, sTemporary); + build.scvtf(d1, w0); + build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n))); + } + return true; + case LBF_MATH_MODF: + LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2)); + emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg); + build.ldr(d1, sTemporary); + build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n))); + if (nresults == 2) + build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n))); + return true; + case LBF_MATH_SIGN: + LUAU_ASSERT(nparams == 1 && nresults == 1); + // TODO: this can be improved with fmov(constant), for now we just load from memory + build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n))); + build.fcmpz(d0); + build.adr(x0, 0.0); + build.ldr(d0, x0); + build.adr(x0, 1.0); + build.ldr(d1, x0); + build.fcsel(d0, d1, d0, getConditionFP(IrCondition::Greater)); + build.adr(x0, -1.0); + build.ldr(d1, x0); + build.fcsel(d0, d1, d0, getConditionFP(IrCondition::Less)); + build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n))); + return true; + + case LBF_TYPE: + build.ldr(w0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, tt))); + build.ldr(x1, mem(rState, offsetof(lua_State, global))); + // TODO: this can use load with shifted/extended offset + LUAU_ASSERT(sizeof(TString*) == 8); + build.add(x1, x1, zextReg(w0), 3); + build.ldr(x0, mem(x1, offsetof(global_State, ttname))); + build.str(x0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.gc))); + return true; + + case LBF_TYPEOF: + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(arg * sizeof(TValue))); + build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr))); + build.blr(x2); + build.str(x0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.gc))); + return true; + + default: + LUAU_ASSERT(!"Missing A64 lowering"); + return false; + } } IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function) @@ -116,119 +314,10 @@ IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, { // In order to allocate registers during lowering, we need to know where instruction results are last used updateLastUseLocations(function); -} -// TODO: Eventually this can go away -bool IrLoweringA64::canLower(const IrFunction& function) -{ #ifdef TRACE gStatsA64.total++; #endif - - for (const IrInst& inst : function.instructions) - { - switch (inst.cmd) - { - case IrCmd::NOP: - case IrCmd::LOAD_TAG: - case IrCmd::LOAD_POINTER: - case IrCmd::LOAD_DOUBLE: - case IrCmd::LOAD_INT: - case IrCmd::LOAD_TVALUE: - case IrCmd::LOAD_NODE_VALUE_TV: - case IrCmd::LOAD_ENV: - case IrCmd::GET_ARR_ADDR: - case IrCmd::GET_SLOT_NODE_ADDR: - case IrCmd::GET_HASH_NODE_ADDR: - case IrCmd::STORE_TAG: - case IrCmd::STORE_POINTER: - case IrCmd::STORE_DOUBLE: - case IrCmd::STORE_INT: - case IrCmd::STORE_TVALUE: - case IrCmd::STORE_NODE_VALUE_TV: - case IrCmd::ADD_INT: - case IrCmd::SUB_INT: - case IrCmd::ADD_NUM: - case IrCmd::SUB_NUM: - case IrCmd::MUL_NUM: - case IrCmd::DIV_NUM: - case IrCmd::MOD_NUM: - case IrCmd::POW_NUM: - case IrCmd::MIN_NUM: - case IrCmd::MAX_NUM: - case IrCmd::UNM_NUM: - case IrCmd::FLOOR_NUM: - case IrCmd::CEIL_NUM: - case IrCmd::ROUND_NUM: - case IrCmd::SQRT_NUM: - case IrCmd::ABS_NUM: - case IrCmd::JUMP: - case IrCmd::JUMP_IF_TRUTHY: - case IrCmd::JUMP_IF_FALSY: - case IrCmd::JUMP_EQ_TAG: - case IrCmd::JUMP_EQ_INT: - case IrCmd::JUMP_EQ_POINTER: - case IrCmd::JUMP_CMP_NUM: - case IrCmd::JUMP_CMP_ANY: - case IrCmd::TABLE_LEN: - case IrCmd::NEW_TABLE: - case IrCmd::DUP_TABLE: - case IrCmd::TRY_NUM_TO_INDEX: - case IrCmd::INT_TO_NUM: - case IrCmd::ADJUST_STACK_TO_REG: - case IrCmd::ADJUST_STACK_TO_TOP: - case IrCmd::INVOKE_FASTCALL: - case IrCmd::CHECK_FASTCALL_RES: - case IrCmd::DO_ARITH: - case IrCmd::DO_LEN: - case IrCmd::GET_TABLE: - case IrCmd::SET_TABLE: - case IrCmd::GET_IMPORT: - case IrCmd::CONCAT: - case IrCmd::GET_UPVALUE: - case IrCmd::SET_UPVALUE: - case IrCmd::PREPARE_FORN: - case IrCmd::CHECK_TAG: - case IrCmd::CHECK_READONLY: - case IrCmd::CHECK_NO_METATABLE: - case IrCmd::CHECK_SAFE_ENV: - case IrCmd::CHECK_ARRAY_SIZE: - case IrCmd::CHECK_SLOT_MATCH: - case IrCmd::INTERRUPT: - case IrCmd::CHECK_GC: - case IrCmd::BARRIER_OBJ: - case IrCmd::BARRIER_TABLE_BACK: - case IrCmd::BARRIER_TABLE_FORWARD: - case IrCmd::SET_SAVEDPC: - case IrCmd::CLOSE_UPVALS: - case IrCmd::CAPTURE: - case IrCmd::CALL: - case IrCmd::RETURN: - case IrCmd::FALLBACK_GETGLOBAL: - case IrCmd::FALLBACK_SETGLOBAL: - case IrCmd::FALLBACK_GETTABLEKS: - case IrCmd::FALLBACK_SETTABLEKS: - case IrCmd::FALLBACK_NAMECALL: - case IrCmd::FALLBACK_PREPVARARGS: - case IrCmd::FALLBACK_GETVARARGS: - case IrCmd::FALLBACK_NEWCLOSURE: - case IrCmd::FALLBACK_DUPCLOSURE: - case IrCmd::SUBSTITUTE: - continue; - - default: -#ifdef TRACE - printf("A64 lowering missing %s\n", getCmdName(inst.cmd)); -#endif - return false; - } - } - -#ifdef TRACE - gStatsA64.can++; -#endif - - return true; } void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) @@ -245,14 +334,14 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) case IrCmd::LOAD_POINTER: { inst.regA64 = regs.allocReg(KindA64::x); - AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.gc)); build.ldr(inst.regA64, addr); break; } case IrCmd::LOAD_DOUBLE: { inst.regA64 = regs.allocReg(KindA64::d); - AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); + AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.n)); build.ldr(inst.regA64, addr); break; } @@ -287,13 +376,21 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) if (inst.b.kind == IrOpKind::Inst) { - // TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it. - build.add(inst.regA64, inst.regA64, castReg(KindA64::x, regOp(inst.b)), kTValueSizeLog2); + build.add(inst.regA64, inst.regA64, zextReg(regOp(inst.b)), kTValueSizeLog2); } else if (inst.b.kind == IrOpKind::Constant) { - LUAU_ASSERT(size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate >> kTValueSizeLog2); // TODO: handle out of range values - build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) << kTValueSizeLog2)); + // TODO: refactor into a common helper? can't use emitAddOffset because we need a temp register + if (intOp(inst.b) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate) + { + build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) * sizeof(TValue))); + } + else + { + RegisterA64 temp = regs.allocTemp(KindA64::x); + build.mov(temp, intOp(inst.b) * sizeof(TValue)); + build.add(inst.regA64, inst.regA64, temp); + } } else LUAU_ASSERT(!"Unsupported instruction form"); @@ -314,8 +411,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) // note: this may clobber inst.a, so it's important that we don't use it after this build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node))); - // TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it. - build.add(inst.regA64, inst.regA64, castReg(KindA64::x, temp2), kLuaNodeSizeLog2); + build.add(inst.regA64, inst.regA64, zextReg(temp2), kLuaNodeSizeLog2); break; } case IrCmd::GET_HASH_NODE_ADDR: @@ -324,18 +420,16 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) RegisterA64 temp1 = regs.allocTemp(KindA64::w); RegisterA64 temp2 = regs.allocTemp(KindA64::w); - // TODO: this can use bic (andnot) to do hash & ~(-1 << lsizenode) instead but we don't support it yet - build.mov(temp1, 1); + // hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode) + build.mov(temp1, -1); build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, lsizenode))); build.lsl(temp1, temp1, temp2); - build.sub(temp1, temp1, 1); build.mov(temp2, uintOp(inst.b)); - build.and_(temp2, temp2, temp1); + build.bic(temp2, temp2, temp1); // note: this may clobber inst.a, so it's important that we don't use it after this build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node))); - // TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it. - build.add(inst.regA64, inst.regA64, castReg(KindA64::x, temp2), kLuaNodeSizeLog2); + build.add(inst.regA64, inst.regA64, zextReg(temp2), kLuaNodeSizeLog2); break; } case IrCmd::STORE_TAG: @@ -501,6 +595,37 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.fabs(inst.regA64, temp); break; } + case IrCmd::NOT_ANY: + { + inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); + + if (inst.a.kind == IrOpKind::Constant) + { + // other cases should've been constant folded + LUAU_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN); + build.eor(inst.regA64, regOp(inst.b), 1); + } + else + { + Label notbool, exit; + + // use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once + LUAU_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1); + build.cmp(regOp(inst.a), LUA_TBOOLEAN); + build.b(ConditionA64::NotEqual, notbool); + + // boolean => invert value + build.eor(inst.regA64, regOp(inst.b), 1); + build.b(exit); + + // not boolean => result is true iff tag was nil + build.setLabel(notbool); + build.cset(inst.regA64, ConditionA64::Less); + + build.setLabel(exit); + } + break; + } case IrCmd::JUMP: jumpOrFallthrough(blockOp(inst.a), next); break; @@ -537,10 +662,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::JUMP_EQ_TAG: - if (inst.b.kind == IrOpKind::Constant) + if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant) build.cmp(regOp(inst.a), tagOp(inst.b)); - else if (inst.b.kind == IrOpKind::Inst) + else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Inst) build.cmp(regOp(inst.a), regOp(inst.b)); + else if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Inst) + build.cmp(regOp(inst.b), tagOp(inst.a)); else LUAU_ASSERT(!"Unsupported instruction form"); @@ -570,10 +697,20 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) { IrCondition cond = conditionOp(inst.c); - RegisterA64 temp1 = tempDouble(inst.a); - RegisterA64 temp2 = tempDouble(inst.b); + if (inst.b.kind == IrOpKind::Constant && doubleOp(inst.b) == 0.0) + { + RegisterA64 temp = tempDouble(inst.a); + + build.fcmpz(temp); + } + else + { + RegisterA64 temp1 = tempDouble(inst.a); + RegisterA64 temp2 = tempDouble(inst.b); + + build.fcmp(temp1, temp2); + } - build.fcmp(temp1, temp2); build.b(getConditionFP(cond), labelOp(inst.d)); jumpOrFallthrough(blockOp(inst.e), next); break; @@ -607,6 +744,30 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) jumpOrFallthrough(blockOp(inst.e), next); break; } + case IrCmd::JUMP_SLOT_MATCH: + { + // TODO: share code with CHECK_SLOT_MATCH + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp1w = castReg(KindA64::w, temp1); + RegisterA64 temp2 = regs.allocTemp(KindA64::x); + + build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTag)); + build.and_(temp1w, temp1w, kLuaNodeTagMask); + build.cmp(temp1w, LUA_TSTRING); + build.b(ConditionA64::NotEqual, labelOp(inst.d)); + + AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value)); + build.ldr(temp1, mem(regOp(inst.a), offsetof(LuaNode, key.value))); + build.ldr(temp2, addr); + build.cmp(temp1, temp2); + build.b(ConditionA64::NotEqual, labelOp(inst.d)); + + build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, val.tt))); + LUAU_ASSERT(LUA_TNIL == 0); + build.cbz(temp1w, labelOp(inst.d)); + jumpOrFallthrough(blockOp(inst.c), next); + break; + } case IrCmd::TABLE_LEN: { regs.assertAllFreeExcept(regOp(inst.a)); @@ -664,6 +825,32 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } break; } + case IrCmd::TRY_CALL_FASTGETTM: + { + regs.assertAllFreeExcept(regOp(inst.a)); + + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::w); + + build.ldr(temp1, mem(regOp(inst.a), offsetof(Table, metatable))); + build.cbz(temp1, labelOp(inst.c)); // no metatable + + build.ldrb(temp2, mem(temp1, offsetof(Table, tmcache))); + build.tst(temp2, 1 << intOp(inst.b)); // can't use tbz/tbnz because their jump offsets are too short + build.b(ConditionA64::NotEqual, labelOp(inst.c)); // Equal = Zero after tst; tmcache caches *absence* of metamethods + + build.mov(x0, temp1); + build.mov(w1, intOp(inst.b)); + build.ldr(x2, mem(rState, offsetof(lua_State, global))); + build.ldr(x2, mem(x2, offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*))); + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm))); + build.blr(x3); + + // TODO: we could takeReg x0 but it's unclear if we will be able to keep x0 allocatable due to aliasing concerns + inst.regA64 = regs.allocReg(KindA64::x); + build.mov(inst.regA64, x0); + break; + } case IrCmd::INT_TO_NUM: { inst.regA64 = regs.allocReg(KindA64::d); @@ -683,8 +870,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) else if (inst.b.kind == IrOpKind::Inst) { build.add(temp, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); - // TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it. - build.add(temp, temp, castReg(KindA64::x, regOp(inst.b)), kTValueSizeLog2); + build.add(temp, temp, zextReg(regOp(inst.b)), kTValueSizeLog2); build.str(temp, mem(rState, offsetof(lua_State, top))); } else @@ -699,6 +885,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.str(temp, mem(rState, offsetof(lua_State, top))); break; } + case IrCmd::FASTCALL: + regs.assertAllFree(); + // TODO: emitBuiltin should be exhaustive + if (!emitBuiltin(build, regs, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), inst.d, intOp(inst.e), intOp(inst.f))) + error = true; + break; case IrCmd::INVOKE_FASTCALL: { regs.assertAllFree(); @@ -710,18 +902,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) if (inst.d.kind == IrOpKind::VmReg) build.add(x4, rBase, uint16_t(vmRegOp(inst.d) * sizeof(TValue))); else if (inst.d.kind == IrOpKind::VmConst) - { - // TODO: refactor into a common helper - if (vmConstOp(inst.d) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate) - { - build.add(x4, rConstants, uint16_t(vmConstOp(inst.d) * sizeof(TValue))); - } - else - { - build.mov(x4, vmConstOp(inst.d) * sizeof(TValue)); - build.add(x4, rConstants, x4); - } - } + emitAddOffset(build, x4, rConstants, vmConstOp(inst.d) * sizeof(TValue)); else LUAU_ASSERT(boolOp(inst.d) == false); @@ -742,7 +923,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(inst.a) * sizeof(luau_FastFunction))); build.blr(x6); - // TODO: we could takeReg w0 but it's unclear if we will be able to keep x0 allocatable due to aliasing concerns + // since w0 came from a call, we need to move it so that we don't violate zextReg safety contract inst.regA64 = regs.allocReg(KindA64::w); build.mov(inst.regA64, w0); break; @@ -758,18 +939,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); if (inst.c.kind == IrOpKind::VmConst) - { - // TODO: refactor into a common helper - if (vmConstOp(inst.c) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate) - { - build.add(x3, rConstants, uint16_t(vmConstOp(inst.c) * sizeof(TValue))); - } - else - { - build.mov(x3, vmConstOp(inst.c) * sizeof(TValue)); - build.add(x3, rConstants, x3); - } - } + emitAddOffset(build, x3, rConstants, vmConstOp(inst.c) * sizeof(TValue)); else build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue))); @@ -835,7 +1005,25 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; case IrCmd::GET_IMPORT: regs.assertAllFree(); - emitInstGetImport(build, vmRegOp(inst.a), uintOp(inst.b)); + // luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false) + build.mov(x0, rState); + build.ldr(x1, mem(rClosure, offsetof(Closure, env))); + build.mov(x2, rConstants); + build.mov(w3, uintOp(inst.b)); + build.mov(w4, 0); + build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_getimport))); + build.blr(x5); + + emitUpdateBase(build); + + // setobj2s(L, ra, L->top - 1) + build.ldr(x0, mem(rState, offsetof(lua_State, top))); + build.sub(x0, x0, sizeof(TValue)); + build.ldr(q0, x0); + build.str(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue))); + + // L->top-- + build.str(x0, mem(rState, offsetof(lua_State, top))); break; case IrCmd::CONCAT: regs.assertAllFree(); @@ -877,7 +1065,6 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::x); RegisterA64 temp3 = regs.allocTemp(KindA64::q); - RegisterA64 temp4 = regs.allocTemp(KindA64::x); // UpVal* build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc))); @@ -887,7 +1074,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.str(temp3, temp2); Label skip; - checkObjectBarrierConditions(build, temp1, temp2, temp4, vmRegOp(inst.b), skip); + checkObjectBarrierConditions(build, temp1, temp2, vmRegOp(inst.b), skip); build.mov(x0, rState); build.mov(x1, temp1); // TODO: aliasing hazard @@ -945,8 +1132,17 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.cmp(temp, regOp(inst.b)); else if (inst.b.kind == IrOpKind::Constant) { - LUAU_ASSERT(size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate); // TODO: handle out of range values - build.cmp(temp, uint16_t(intOp(inst.b))); + // TODO: refactor into a common helper? + if (size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate) + { + build.cmp(temp, uint16_t(intOp(inst.b))); + } + else + { + RegisterA64 temp2 = regs.allocTemp(KindA64::w); + build.mov(temp2, intOp(inst.b)); + build.cmp(temp, temp2); + } } else LUAU_ASSERT(!"Unsupported instruction form"); @@ -959,12 +1155,9 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp1w = castReg(KindA64::w, temp1); RegisterA64 temp2 = regs.allocTemp(KindA64::x); - RegisterA64 temp2w = castReg(KindA64::w, temp2); - build.ldr(temp1w, mem(regOp(inst.a), kOffsetOfLuaNodeTag)); - // TODO: this needs bitfield extraction, or and-immediate - build.mov(temp2w, kLuaNodeTagMask); - build.and_(temp1w, temp1w, temp2w); + build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTag)); + build.and_(temp1w, temp1w, kLuaNodeTagMask); build.cmp(temp1w, LUA_TSTRING); build.b(ConditionA64::NotEqual, labelOp(inst.c)); @@ -979,6 +1172,15 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.cbz(temp1w, labelOp(inst.c)); break; } + case IrCmd::CHECK_NODE_NO_NEXT: + { + RegisterA64 temp = regs.allocTemp(KindA64::w); + + build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyNext)); + build.and_(temp, temp, ~((1u << kNextBitOffset) - 1)); // TODO: this would be cleaner with a right shift + build.cbnz(temp, labelOp(inst.b)); + break; + } case IrCmd::INTERRUPT: { unsigned int pcpos = uintOp(inst.a); @@ -1023,11 +1225,10 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) { regs.assertAllFreeExcept(regOp(inst.a)); - Label skip; - RegisterA64 temp1 = regs.allocTemp(KindA64::x); - RegisterA64 temp2 = regs.allocTemp(KindA64::x); + RegisterA64 temp = regs.allocTemp(KindA64::x); - checkObjectBarrierConditions(build, regOp(inst.a), temp1, temp2, vmRegOp(inst.b), skip); + Label skip; + checkObjectBarrierConditions(build, regOp(inst.a), temp, vmRegOp(inst.b), skip); build.mov(x0, rState); build.mov(x1, regOp(inst.a)); // TODO: aliasing hazard @@ -1044,15 +1245,13 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) regs.assertAllFreeExcept(regOp(inst.a)); Label skip; - RegisterA64 temp1 = regs.allocTemp(KindA64::w); - RegisterA64 temp2 = regs.allocTemp(KindA64::w); + RegisterA64 temp = regs.allocTemp(KindA64::w); // isblack(obj2gco(t)) - build.ldrb(temp1, mem(regOp(inst.a), offsetof(GCheader, marked))); + build.ldrb(temp, mem(regOp(inst.a), offsetof(GCheader, marked))); // TODO: conditional bit test with BLACKBIT - build.mov(temp2, bitmask(BLACKBIT)); - build.and_(temp1, temp1, temp2); - build.cbz(temp1, skip); + build.tst(temp, bitmask(BLACKBIT)); + build.b(ConditionA64::Equal, skip); // Equal = Zero after tst build.mov(x0, rState); build.mov(x1, regOp(inst.a)); // TODO: aliasing hazard here and below @@ -1068,11 +1267,10 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) { regs.assertAllFreeExcept(regOp(inst.a)); - Label skip; - RegisterA64 temp1 = regs.allocTemp(KindA64::x); - RegisterA64 temp2 = regs.allocTemp(KindA64::x); + RegisterA64 temp = regs.allocTemp(KindA64::x); - checkObjectBarrierConditions(build, regOp(inst.a), temp1, temp2, vmRegOp(inst.b), skip); + Label skip; + checkObjectBarrierConditions(build, regOp(inst.a), temp, vmRegOp(inst.b), skip); build.mov(x0, rState); build.mov(x1, regOp(inst.a)); // TODO: aliasing hazard @@ -1086,21 +1284,10 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::SET_SAVEDPC: { - unsigned int pcpos = uintOp(inst.a); RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::x); - // TODO: refactor into a common helper - if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate) - { - build.add(temp1, rCode, uint16_t(pcpos * sizeof(Instruction))); - } - else - { - build.mov(temp1, pcpos * sizeof(Instruction)); - build.add(temp1, rCode, temp1); - } - + emitAddOffset(build, temp1, rCode, uintOp(inst.a) * sizeof(Instruction)); build.ldr(temp2, mem(rState, offsetof(lua_State, ci))); build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc))); break; @@ -1133,14 +1320,100 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) case IrCmd::CAPTURE: // no-op break; + case IrCmd::SETLIST: + regs.assertAllFree(); + emitFallback(build, LOP_SETLIST, uintOp(inst.a)); + break; case IrCmd::CALL: regs.assertAllFree(); - emitInstCall(build, helpers, vmRegOp(inst.a), intOp(inst.b), intOp(inst.c)); + // argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams; + if (intOp(inst.b) == LUA_MULTRET) + build.ldr(x2, mem(rState, offsetof(lua_State, top))); + else + build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + 1 + intOp(inst.b)) * sizeof(TValue))); + + // callFallback(L, ra, argtop, nresults) + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + build.mov(w3, intOp(inst.c)); + build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback))); + build.blr(x4); + + // reentry with x0=closure (NULL will trigger exit) + build.b(helpers.reentry); break; case IrCmd::RETURN: regs.assertAllFree(); - emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b)); + // callFallback(L, ra, n) + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + build.mov(w2, intOp(inst.b)); + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback))); + build.blr(x3); + + // reentry with x0=closure (NULL will trigger exit) + build.b(helpers.reentry); break; + case IrCmd::FORGLOOP: + // register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables + regs.assertAllFree(); + // clear extra variables since we might have more than two + if (intOp(inst.b) > 2) + { + build.mov(w0, LUA_TNIL); + for (int i = 2; i < intOp(inst.b); ++i) + build.str(w0, mem(rBase, (vmRegOp(inst.a) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt))); + } + // we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here + build.mov(x0, rState); + build.ldr(x1, mem(rBase, (vmRegOp(inst.a) + 1) * sizeof(TValue) + offsetof(TValue, value.gc))); + build.ldr(w2, mem(rBase, (vmRegOp(inst.a) + 2) * sizeof(TValue) + offsetof(TValue, value.p))); + build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter))); + build.blr(x4); + // note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack + build.cbnz(w0, labelOp(inst.c)); + jumpOrFallthrough(blockOp(inst.d), next); + break; + case IrCmd::FORGLOOP_FALLBACK: + regs.assertAllFree(); + build.mov(x0, rState); + build.mov(w1, vmRegOp(inst.a)); + build.mov(w2, intOp(inst.b)); + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback))); + build.blr(x3); + emitUpdateBase(build); + build.cbnz(w0, labelOp(inst.c)); + jumpOrFallthrough(blockOp(inst.d), next); + break; + case IrCmd::FORGPREP_XNEXT_FALLBACK: + regs.assertAllFree(); + build.mov(x0, rState); + build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); + build.mov(w2, uintOp(inst.a) + 1); + build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback))); + build.blr(x3); + // note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack + jumpOrFallthrough(blockOp(inst.c), next); + break; + case IrCmd::COVERAGE: + { + RegisterA64 temp1 = regs.allocTemp(KindA64::x); + RegisterA64 temp2 = regs.allocTemp(KindA64::w); + RegisterA64 temp3 = regs.allocTemp(KindA64::w); + + build.mov(temp1, uintOp(inst.a) * sizeof(Instruction)); + build.ldr(temp2, mem(rCode, temp1)); + + // increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1 + // note: cmp can be eliminated with adds but we aren't concerned with code size for coverage + build.add(temp3, temp2, 256); + build.cmp(temp3, 0); + build.csel(temp2, temp2, temp3, ConditionA64::Less); + + build.str(temp2, mem(rCode, temp1)); + break; + } // Full instruction fallbacks case IrCmd::FALLBACK_GETGLOBAL: @@ -1208,9 +1481,25 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) regs.assertAllFree(); emitFallback(build, LOP_DUPCLOSURE, uintOp(inst.a)); break; + case IrCmd::FALLBACK_FORGPREP: + regs.assertAllFree(); + emitFallback(build, LOP_FORGPREP, uintOp(inst.a)); + jumpOrFallthrough(blockOp(inst.c), next); + break; - default: - LUAU_ASSERT(!"Not supported yet"); + // Pseudo instructions + case IrCmd::NOP: + case IrCmd::SUBSTITUTE: + LUAU_ASSERT(!"Pseudo instructions should not be lowered"); + break; + + // Unsupported instructions + // Note: when adding implementations for these, please move the case: label so that implemented instructions match the order in IrData.h + case IrCmd::STORE_VECTOR: +#ifdef TRACE + gStatsA64.missing++; +#endif + error = true; break; } @@ -1220,7 +1509,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) bool IrLoweringA64::hasError() const { - return false; + return error; } bool IrLoweringA64::isFallthroughBlock(IrBlock target, IrBlock next) @@ -1287,17 +1576,7 @@ AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset) RegisterA64 temp = regs.allocTemp(KindA64::x); - // TODO: refactor into a common helper - if (constantOffset <= AssemblyBuilderA64::kMaxImmediate) - { - build.add(temp, rConstants, uint16_t(constantOffset)); - } - else - { - build.mov(temp, int(constantOffset)); - build.add(temp, rConstants, temp); - } - + emitAddOffset(build, temp, rConstants, constantOffset); return temp; } // If we have a register, we assume it's a pointer to TValue diff --git a/CodeGen/src/IrLoweringA64.h b/CodeGen/src/IrLoweringA64.h index b374a26a..0c9f8744 100644 --- a/CodeGen/src/IrLoweringA64.h +++ b/CodeGen/src/IrLoweringA64.h @@ -26,8 +26,6 @@ struct IrLoweringA64 { IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function); - static bool canLower(const IrFunction& function); - void lowerInst(IrInst& inst, uint32_t index, IrBlock& next); bool hasError() const; @@ -61,6 +59,8 @@ struct IrLoweringA64 IrFunction& function; IrRegAllocA64 regs; + + bool error = false; }; } // namespace A64 diff --git a/CodeGen/src/IrLoweringX64.cpp b/CodeGen/src/IrLoweringX64.cpp index f2dfdb3b..51325a37 100644 --- a/CodeGen/src/IrLoweringX64.cpp +++ b/CodeGen/src/IrLoweringX64.cpp @@ -31,6 +31,8 @@ IrLoweringX64::IrLoweringX64(AssemblyBuilderX64& build, ModuleHelpers& helpers, { // In order to allocate registers during lowering, we need to know where instruction results are last used updateLastUseLocations(function); + + build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2); } void IrLoweringX64::storeDoubleAsFloat(OperandX64 dst, IrOp src) @@ -59,7 +61,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) switch (inst.cmd) { case IrCmd::LOAD_TAG: - inst.regX64 = regs.allocGprReg(SizeX64::dword, index); + inst.regX64 = regs.allocReg(SizeX64::dword, index); if (inst.a.kind == IrOpKind::VmReg) build.mov(inst.regX64, luauRegTag(vmRegOp(inst.a))); @@ -73,7 +75,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(!"Unsupported instruction form"); break; case IrCmd::LOAD_POINTER: - inst.regX64 = regs.allocGprReg(SizeX64::qword, index); + inst.regX64 = regs.allocReg(SizeX64::qword, index); if (inst.a.kind == IrOpKind::VmReg) build.mov(inst.regX64, luauRegValue(vmRegOp(inst.a))); @@ -87,7 +89,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(!"Unsupported instruction form"); break; case IrCmd::LOAD_DOUBLE: - inst.regX64 = regs.allocXmmReg(index); + inst.regX64 = regs.allocReg(SizeX64::xmmword, index); if (inst.a.kind == IrOpKind::VmReg) build.vmovsd(inst.regX64, luauRegValue(vmRegOp(inst.a))); @@ -97,12 +99,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(!"Unsupported instruction form"); break; case IrCmd::LOAD_INT: - inst.regX64 = regs.allocGprReg(SizeX64::dword, index); + inst.regX64 = regs.allocReg(SizeX64::dword, index); build.mov(inst.regX64, luauRegValueInt(vmRegOp(inst.a))); break; case IrCmd::LOAD_TVALUE: - inst.regX64 = regs.allocXmmReg(index); + inst.regX64 = regs.allocReg(SizeX64::xmmword, index); if (inst.a.kind == IrOpKind::VmReg) build.vmovups(inst.regX64, luauReg(vmRegOp(inst.a))); @@ -114,12 +116,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) LUAU_ASSERT(!"Unsupported instruction form"); break; case IrCmd::LOAD_NODE_VALUE_TV: - inst.regX64 = regs.allocXmmReg(index); + inst.regX64 = regs.allocReg(SizeX64::xmmword, index); build.vmovups(inst.regX64, luauNodeValue(regOp(inst.a))); break; case IrCmd::LOAD_ENV: - inst.regX64 = regs.allocGprReg(SizeX64::qword, index); + inst.regX64 = regs.allocReg(SizeX64::qword, index); build.mov(inst.regX64, sClosure); build.mov(inst.regX64, qword[inst.regX64 + offsetof(Closure, env)]); @@ -127,7 +129,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) case IrCmd::GET_ARR_ADDR: if (inst.b.kind == IrOpKind::Inst) { - inst.regX64 = regs.allocGprRegOrReuse(SizeX64::qword, index, {inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {inst.b}); if (dwordReg(inst.regX64) != regOp(inst.b)) build.mov(dwordReg(inst.regX64), regOp(inst.b)); @@ -137,7 +139,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } else if (inst.b.kind == IrOpKind::Constant) { - inst.regX64 = regs.allocGprRegOrReuse(SizeX64::qword, index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {inst.a}); build.mov(inst.regX64, qword[regOp(inst.a) + offsetof(Table, array)]); @@ -151,7 +153,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; case IrCmd::GET_SLOT_NODE_ADDR: { - inst.regX64 = regs.allocGprReg(SizeX64::qword, index); + inst.regX64 = regs.allocReg(SizeX64::qword, index); ScopedRegX64 tmp{regs, SizeX64::qword}; @@ -160,11 +162,11 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::GET_HASH_NODE_ADDR: { - inst.regX64 = regs.allocGprReg(SizeX64::qword, index); - // Custom bit shift value can only be placed in cl ScopedRegX64 shiftTmp{regs, regs.takeReg(rcx, kInvalidInstIdx)}; + inst.regX64 = regs.allocReg(SizeX64::qword, index); + ScopedRegX64 tmp{regs, SizeX64::qword}; build.mov(inst.regX64, qword[regOp(inst.a) + offsetof(Table, node)]); @@ -232,7 +234,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.vmovups(luauNodeValue(regOp(inst.a)), regOp(inst.b)); break; case IrCmd::ADD_INT: - inst.regX64 = regs.allocGprRegOrReuse(SizeX64::dword, index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a}); if (inst.regX64 == regOp(inst.a) && intOp(inst.b) == 1) build.inc(inst.regX64); @@ -242,7 +244,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.lea(inst.regX64, addr[regOp(inst.a) + intOp(inst.b)]); break; case IrCmd::SUB_INT: - inst.regX64 = regs.allocGprRegOrReuse(SizeX64::dword, index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a}); if (inst.regX64 == regOp(inst.a) && intOp(inst.b) == 1) build.dec(inst.regX64); @@ -252,7 +254,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) build.lea(inst.regX64, addr[regOp(inst.a) - intOp(inst.b)]); break; case IrCmd::ADD_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Constant) { @@ -267,7 +269,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } break; case IrCmd::SUB_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Constant) { @@ -282,7 +284,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } break; case IrCmd::MUL_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Constant) { @@ -297,7 +299,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } break; case IrCmd::DIV_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Constant) { @@ -313,7 +315,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; case IrCmd::MOD_NUM: { - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b}); ScopedRegX64 optLhsTmp{regs}; RegisterX64 lhs; @@ -362,7 +364,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::MIN_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Constant) { @@ -377,7 +379,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } break; case IrCmd::MAX_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Constant) { @@ -393,7 +395,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; case IrCmd::UNM_NUM: { - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a}); RegisterX64 src = regOp(inst.a); @@ -410,18 +412,18 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::FLOOR_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a}); build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToNegativeInfinity); break; case IrCmd::CEIL_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a}); build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToPositiveInfinity); break; case IrCmd::ROUND_NUM: { - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a}); ScopedRegX64 tmp1{regs, SizeX64::xmmword}; ScopedRegX64 tmp2{regs, SizeX64::xmmword}; @@ -439,12 +441,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::SQRT_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a}); build.vsqrtsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a)); break; case IrCmd::ABS_NUM: - inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a}); if (inst.a.kind != IrOpKind::Inst) build.vmovsd(inst.regX64, memRegDoubleOp(inst.a)); @@ -456,7 +458,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) case IrCmd::NOT_ANY: { // TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target - inst.regX64 = regs.allocGprRegOrReuse(SizeX64::dword, index, {inst.a, inst.b}); + inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b}); Label saveone, savezero, exit; @@ -558,7 +560,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a); callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]); - inst.regX64 = regs.allocXmmReg(index); + inst.regX64 = regs.allocReg(SizeX64::xmmword, index); build.vcvtsi2sd(inst.regX64, inst.regX64, eax); break; } @@ -566,8 +568,8 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) { IrCallWrapperX64 callWrap(regs, build, index); callWrap.addArgument(SizeX64::qword, rState); - callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.a)), inst.a); - callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b)), inst.b); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.a))); + callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b))); callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]); inst.regX64 = regs.takeReg(rax, index); break; @@ -583,7 +585,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::TRY_NUM_TO_INDEX: { - inst.regX64 = regs.allocGprReg(SizeX64::dword, index); + inst.regX64 = regs.allocReg(SizeX64::dword, index); ScopedRegX64 tmp{regs, SizeX64::xmmword}; @@ -620,7 +622,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::INT_TO_NUM: - inst.regX64 = regs.allocXmmReg(index); + inst.regX64 = regs.allocReg(SizeX64::xmmword, index); build.vcvtsi2sd(inst.regX64, inst.regX64, regOp(inst.a)); break; @@ -688,11 +690,10 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) if (nparams == LUA_MULTRET) { - // Compute 'L->top - (ra + 1)', on SystemV, take r9 register to compute directly into the argument - // TODO: IrCallWrapperX64 should provide a way to 'guess' target argument register correctly - RegisterX64 reg = build.abi == ABIX64::Windows ? regs.allocGprReg(SizeX64::qword, kInvalidInstIdx) : regs.takeReg(rArg6, kInvalidInstIdx); + RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword); ScopedRegX64 tmp{regs, SizeX64::qword}; + // L->top - (ra + 1) build.mov(reg, qword[rState + offsetof(lua_State, top)]); build.lea(tmp.reg, addr[rBase + (ra + 1) * sizeof(TValue)]); build.sub(reg, tmp.reg); @@ -759,9 +760,35 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } break; case IrCmd::GET_IMPORT: - regs.assertAllFree(); - emitInstGetImportFallback(build, vmRegOp(inst.a), uintOp(inst.b)); + { + ScopedRegX64 tmp1{regs, SizeX64::qword}; + + build.mov(tmp1.reg, sClosure); + + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, qword[tmp1.release() + offsetof(Closure, env)]); + callWrap.addArgument(SizeX64::qword, rConstants); + callWrap.addArgument(SizeX64::dword, uintOp(inst.b)); + callWrap.addArgument(SizeX64::dword, 0); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_getimport)]); + + emitUpdateBase(build); + + ScopedRegX64 tmp2{regs, SizeX64::qword}; + + // setobj2s(L, ra, L->top - 1) + build.mov(tmp2.reg, qword[rState + offsetof(lua_State, top)]); + build.sub(tmp2.reg, sizeof(TValue)); + + ScopedRegX64 tmp3{regs, SizeX64::xmmword}; + build.vmovups(tmp3.reg, xmmword[tmp2.reg]); + build.vmovups(luauReg(vmRegOp(inst.a)), tmp3.reg); + + // L->top-- + build.mov(qword[rState + offsetof(lua_State, top)], tmp2.reg); break; + } case IrCmd::CONCAT: { IrCallWrapperX64 callWrap(regs, build, index); @@ -783,7 +810,6 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) // uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value Label skip; - // TODO: jumpIfTagIsNot can be generalized to take OperandX64 and then we can use it here; let's wait until we see this more though build.cmp(dword[tmp1.reg + offsetof(TValue, tt)], LUA_TUPVAL); build.jcc(ConditionX64::NotEqual, skip); @@ -822,36 +848,25 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) callPrepareForN(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c)); break; case IrCmd::CHECK_TAG: - if (inst.a.kind == IrOpKind::Inst) - { - build.cmp(regOp(inst.a), tagOp(inst.b)); - build.jcc(ConditionX64::NotEqual, labelOp(inst.c)); - } - else if (inst.a.kind == IrOpKind::VmReg) - { - jumpIfTagIsNot(build, vmRegOp(inst.a), lua_Type(tagOp(inst.b)), labelOp(inst.c)); - } - else if (inst.a.kind == IrOpKind::VmConst) - { - build.cmp(luauConstantTag(vmConstOp(inst.a)), tagOp(inst.b)); - build.jcc(ConditionX64::NotEqual, labelOp(inst.c)); - } - else - { - LUAU_ASSERT(!"Unsupported instruction form"); - } + build.cmp(memRegTagOp(inst.a), tagOp(inst.b)); + build.jcc(ConditionX64::NotEqual, labelOp(inst.c)); break; case IrCmd::CHECK_READONLY: - jumpIfTableIsReadOnly(build, regOp(inst.a), labelOp(inst.b)); + build.cmp(byte[regOp(inst.a) + offsetof(Table, readonly)], 0); + build.jcc(ConditionX64::NotEqual, labelOp(inst.b)); break; case IrCmd::CHECK_NO_METATABLE: - jumpIfMetatablePresent(build, regOp(inst.a), labelOp(inst.b)); + build.cmp(qword[regOp(inst.a) + offsetof(Table, metatable)], 0); + build.jcc(ConditionX64::NotEqual, labelOp(inst.b)); break; case IrCmd::CHECK_SAFE_ENV: { ScopedRegX64 tmp{regs, SizeX64::qword}; - jumpIfUnsafeEnv(build, tmp.reg, labelOp(inst.a)); + build.mov(tmp.reg, sClosure); + build.mov(tmp.reg, qword[tmp.reg + offsetof(Closure, env)]); + build.cmp(byte[tmp.reg + offsetof(Table, safeenv)], 0); + build.jcc(ConditionX64::Equal, labelOp(inst.a)); break; } case IrCmd::CHECK_ARRAY_SIZE: @@ -872,11 +887,16 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::CHECK_NODE_NO_NEXT: - jumpIfNodeHasNext(build, regOp(inst.a), labelOp(inst.b)); + { + ScopedRegX64 tmp{regs, SizeX64::dword}; + + build.mov(tmp.reg, dword[regOp(inst.a) + offsetof(LuaNode, key) + kOffsetOfTKeyNext]); + build.shr(tmp.reg, kNextBitOffset); + build.jcc(ConditionX64::NotZero, labelOp(inst.b)); break; + } case IrCmd::INTERRUPT: - regs.assertAllFree(); - emitInterrupt(build, uintOp(inst.a)); + emitInterrupt(regs, build, uintOp(inst.a)); break; case IrCmd::CHECK_GC: callStepGc(regs, build); @@ -970,94 +990,127 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; case IrCmd::FORGLOOP: regs.assertAllFree(); - emitinstForGLoop(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c), labelOp(inst.d)); + emitInstForGLoop(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c)); + jumpOrFallthrough(blockOp(inst.d), next); break; case IrCmd::FORGLOOP_FALLBACK: - regs.assertAllFree(); - emitinstForGLoopFallback(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c)); - build.jmp(labelOp(inst.d)); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::dword, vmRegOp(inst.a)); + callWrap.addArgument(SizeX64::dword, intOp(inst.b)); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNonTableFallback)]); + + emitUpdateBase(build); + + build.test(al, al); + build.jcc(ConditionX64::NotZero, labelOp(inst.c)); + jumpOrFallthrough(blockOp(inst.d), next); break; + } case IrCmd::FORGPREP_XNEXT_FALLBACK: - regs.assertAllFree(); - emitInstForGPrepXnextFallback(build, uintOp(inst.a), vmRegOp(inst.b), labelOp(inst.c)); + { + IrCallWrapperX64 callWrap(regs, build, index); + callWrap.addArgument(SizeX64::qword, rState); + callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.b))); + callWrap.addArgument(SizeX64::dword, uintOp(inst.a) + 1); + callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgPrepXnextFallback)]); + jumpOrFallthrough(blockOp(inst.c), next); break; + } case IrCmd::COVERAGE: - regs.assertAllFree(); - emitInstCoverage(build, uintOp(inst.a)); + { + ScopedRegX64 tmp1{regs, SizeX64::qword}; + ScopedRegX64 tmp2{regs, SizeX64::dword}; + ScopedRegX64 tmp3{regs, SizeX64::dword}; + + build.mov(tmp1.reg, sCode); + build.add(tmp1.reg, uintOp(inst.a) * sizeof(Instruction)); + + // hits = LUAU_INSN_E(*pc) + build.mov(tmp2.reg, dword[tmp1.reg]); + build.sar(tmp2.reg, 8); + + // hits = (hits < (1 << 23) - 1) ? hits + 1 : hits; + build.xor_(tmp3.reg, tmp3.reg); + build.cmp(tmp2.reg, (1 << 23) - 1); + build.setcc(ConditionX64::NotEqual, byteReg(tmp3.reg)); + build.add(tmp2.reg, tmp3.reg); + + // VM_PATCH_E(pc, hits); + build.sal(tmp2.reg, 8); + build.movzx(tmp3.reg, byte[tmp1.reg]); + build.or_(tmp3.reg, tmp2.reg); + build.mov(dword[tmp1.reg], tmp3.reg); break; + } // Full instruction fallbacks case IrCmd::FALLBACK_GETGLOBAL: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); - regs.assertAllFree(); - emitFallback(build, data, LOP_GETGLOBAL, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_GETGLOBAL, uintOp(inst.a)); break; case IrCmd::FALLBACK_SETGLOBAL: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); - regs.assertAllFree(); - emitFallback(build, data, LOP_SETGLOBAL, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_SETGLOBAL, uintOp(inst.a)); break; case IrCmd::FALLBACK_GETTABLEKS: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); - regs.assertAllFree(); - emitFallback(build, data, LOP_GETTABLEKS, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_GETTABLEKS, uintOp(inst.a)); break; case IrCmd::FALLBACK_SETTABLEKS: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); - regs.assertAllFree(); - emitFallback(build, data, LOP_SETTABLEKS, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_SETTABLEKS, uintOp(inst.a)); break; case IrCmd::FALLBACK_NAMECALL: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst); - regs.assertAllFree(); - emitFallback(build, data, LOP_NAMECALL, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_NAMECALL, uintOp(inst.a)); break; case IrCmd::FALLBACK_PREPVARARGS: LUAU_ASSERT(inst.b.kind == IrOpKind::Constant); - regs.assertAllFree(); - emitFallback(build, data, LOP_PREPVARARGS, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_PREPVARARGS, uintOp(inst.a)); break; case IrCmd::FALLBACK_GETVARARGS: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::Constant); - regs.assertAllFree(); - emitFallback(build, data, LOP_GETVARARGS, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_GETVARARGS, uintOp(inst.a)); break; case IrCmd::FALLBACK_NEWCLOSURE: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::Constant); - regs.assertAllFree(); - emitFallback(build, data, LOP_NEWCLOSURE, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_NEWCLOSURE, uintOp(inst.a)); break; case IrCmd::FALLBACK_DUPCLOSURE: LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg); LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst); - regs.assertAllFree(); - emitFallback(build, data, LOP_DUPCLOSURE, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_DUPCLOSURE, uintOp(inst.a)); break; case IrCmd::FALLBACK_FORGPREP: - regs.assertAllFree(); - emitFallback(build, data, LOP_FORGPREP, uintOp(inst.a)); + emitFallback(regs, build, data, LOP_FORGPREP, uintOp(inst.a)); + jumpOrFallthrough(blockOp(inst.c), next); break; - default: - LUAU_ASSERT(!"Not supported yet"); + + // Pseudo instructions + case IrCmd::NOP: + case IrCmd::SUBSTITUTE: + LUAU_ASSERT(!"Pseudo instructions should not be lowered"); break; } diff --git a/CodeGen/src/IrRegAllocA64.cpp b/CodeGen/src/IrRegAllocA64.cpp index c6db9e9e..9a06cf69 100644 --- a/CodeGen/src/IrRegAllocA64.cpp +++ b/CodeGen/src/IrRegAllocA64.cpp @@ -1,9 +1,7 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #include "IrRegAllocA64.h" -#ifdef _MSC_VER -#include -#endif +#include "BitUtils.h" namespace Luau { @@ -12,19 +10,6 @@ namespace CodeGen namespace A64 { -inline int setBit(uint32_t n) -{ - LUAU_ASSERT(n); - -#ifdef _MSC_VER - unsigned long rl; - _BitScanReverse(&rl, n); - return int(rl); -#else - return 31 - __builtin_clz(n); -#endif -} - IrRegAllocA64::IrRegAllocA64(IrFunction& function, std::initializer_list> regs) : function(function) { @@ -52,7 +37,7 @@ RegisterA64 IrRegAllocA64::allocReg(KindA64 kind) return noreg; } - int index = setBit(set.free); + int index = 31 - countlz(set.free); set.free &= ~(1u << index); return RegisterA64{kind, uint8_t(index)}; @@ -68,7 +53,7 @@ RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind) return noreg; } - int index = setBit(set.free); + int index = 31 - countlz(set.free); set.free &= ~(1u << index); set.temp |= 1u << index; diff --git a/CodeGen/src/IrRegAllocX64.cpp b/CodeGen/src/IrRegAllocX64.cpp index dc9e7f90..24d8f51a 100644 --- a/CodeGen/src/IrRegAllocX64.cpp +++ b/CodeGen/src/IrRegAllocX64.cpp @@ -1,6 +1,8 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #include "Luau/IrRegAllocX64.h" +#include "Luau/IrUtils.h" + #include "EmitCommonX64.h" namespace Luau @@ -12,11 +14,6 @@ namespace X64 static const RegisterX64 kGprAllocOrder[] = {rax, rdx, rcx, rbx, rsi, rdi, r8, r9, r10, r11}; -static bool isFullTvalueOperand(IrCmd cmd) -{ - return cmd == IrCmd::LOAD_TVALUE || cmd == IrCmd::LOAD_NODE_VALUE_TV; -} - IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function) : build(build) , function(function) @@ -27,50 +24,43 @@ IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function) xmmInstUsers.fill(kInvalidInstIdx); } -RegisterX64 IrRegAllocX64::allocGprReg(SizeX64 preferredSize, uint32_t instIdx) +RegisterX64 IrRegAllocX64::allocReg(SizeX64 size, uint32_t instIdx) { - LUAU_ASSERT( - preferredSize == SizeX64::byte || preferredSize == SizeX64::word || preferredSize == SizeX64::dword || preferredSize == SizeX64::qword); - - for (RegisterX64 reg : kGprAllocOrder) + if (size == SizeX64::xmmword) { - if (freeGprMap[reg.index]) + for (size_t i = 0; i < freeXmmMap.size(); ++i) { - freeGprMap[reg.index] = false; - gprInstUsers[reg.index] = instIdx; - return RegisterX64{preferredSize, reg.index}; + if (freeXmmMap[i]) + { + freeXmmMap[i] = false; + xmmInstUsers[i] = instIdx; + return RegisterX64{size, uint8_t(i)}; + } } } - - // If possible, spill the value with the furthest next use - if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(gprInstUsers); furthestUseTarget != kInvalidInstIdx) - return takeReg(function.instructions[furthestUseTarget].regX64, instIdx); - - LUAU_ASSERT(!"Out of GPR registers to allocate"); - return noreg; -} - -RegisterX64 IrRegAllocX64::allocXmmReg(uint32_t instIdx) -{ - for (size_t i = 0; i < freeXmmMap.size(); ++i) + else { - if (freeXmmMap[i]) + for (RegisterX64 reg : kGprAllocOrder) { - freeXmmMap[i] = false; - xmmInstUsers[i] = instIdx; - return RegisterX64{SizeX64::xmmword, uint8_t(i)}; + if (freeGprMap[reg.index]) + { + freeGprMap[reg.index] = false; + gprInstUsers[reg.index] = instIdx; + return RegisterX64{size, reg.index}; + } } } // Out of registers, spill the value with the furthest next use - if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(xmmInstUsers); furthestUseTarget != kInvalidInstIdx) + const std::array& regInstUsers = size == SizeX64::xmmword ? xmmInstUsers : gprInstUsers; + if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(regInstUsers); furthestUseTarget != kInvalidInstIdx) return takeReg(function.instructions[furthestUseTarget].regX64, instIdx); - LUAU_ASSERT(!"Out of XMM registers to allocate"); + LUAU_ASSERT(!"Out of registers to allocate"); return noreg; } -RegisterX64 IrRegAllocX64::allocGprRegOrReuse(SizeX64 preferredSize, uint32_t instIdx, std::initializer_list oprefs) +RegisterX64 IrRegAllocX64::allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list oprefs) { for (IrOp op : oprefs) { @@ -81,39 +71,24 @@ RegisterX64 IrRegAllocX64::allocGprRegOrReuse(SizeX64 preferredSize, uint32_t in if (source.lastUse == instIdx && !source.reusedReg && !source.spilled) { - LUAU_ASSERT(source.regX64.size != SizeX64::xmmword); + // Not comparing size directly because we only need matching register set + if ((size == SizeX64::xmmword) != (source.regX64.size == SizeX64::xmmword)) + continue; + LUAU_ASSERT(source.regX64 != noreg); source.reusedReg = true; - gprInstUsers[source.regX64.index] = instIdx; - return RegisterX64{preferredSize, source.regX64.index}; + + if (size == SizeX64::xmmword) + xmmInstUsers[source.regX64.index] = instIdx; + else + gprInstUsers[source.regX64.index] = instIdx; + + return RegisterX64{size, source.regX64.index}; } } - return allocGprReg(preferredSize, instIdx); -} - -RegisterX64 IrRegAllocX64::allocXmmRegOrReuse(uint32_t instIdx, std::initializer_list oprefs) -{ - for (IrOp op : oprefs) - { - if (op.kind != IrOpKind::Inst) - continue; - - IrInst& source = function.instructions[op.index]; - - if (source.lastUse == instIdx && !source.reusedReg && !source.spilled) - { - LUAU_ASSERT(source.regX64.size == SizeX64::xmmword); - LUAU_ASSERT(source.regX64 != noreg); - - source.reusedReg = true; - xmmInstUsers[source.regX64.index] = instIdx; - return source.regX64; - } - } - - return allocXmmReg(instIdx); + return allocReg(size, instIdx); } RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg, uint32_t instIdx) @@ -197,41 +172,34 @@ bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t instIdx) const void IrRegAllocX64::preserve(IrInst& inst) { - bool doubleSlot = isFullTvalueOperand(inst.cmd); + IrSpillX64 spill; + spill.instIdx = function.getInstIndex(inst); + spill.valueKind = getCmdValueKind(inst.cmd); + spill.spillId = nextSpillId++; + spill.originalLoc = inst.regX64; - // Find a free stack slot. Two consecutive slots might be required for 16 byte TValues, so '- 1' is used - for (unsigned i = 0; i < unsigned(usedSpillSlots.size() - 1); ++i) + // Loads from VmReg/VmConst don't have to be spilled, they can be restored from a register later + if (!hasRestoreOp(inst)) { - if (usedSpillSlots.test(i)) - continue; + unsigned i = findSpillStackSlot(spill.valueKind); - if (doubleSlot && usedSpillSlots.test(i + 1)) - { - ++i; // No need to retest this double position - continue; - } - - if (inst.regX64.size == SizeX64::xmmword && doubleSlot) - { + if (spill.valueKind == IrValueKind::Tvalue) build.vmovups(xmmword[sSpillArea + i * 8], inst.regX64); - } - else if (inst.regX64.size == SizeX64::xmmword) - { + else if (spill.valueKind == IrValueKind::Double) build.vmovsd(qword[sSpillArea + i * 8], inst.regX64); - } + else if (spill.valueKind == IrValueKind::Pointer) + build.mov(qword[sSpillArea + i * 8], inst.regX64); + else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int) + build.mov(dword[sSpillArea + i * 8], inst.regX64); else - { - OperandX64 location = addr[sSpillArea + i * 8]; - location.memSize = inst.regX64.size; // Override memory access size - build.mov(location, inst.regX64); - } + LUAU_ASSERT(!"unsupported value kind"); usedSpillSlots.set(i); if (i + 1 > maxUsedSlot) maxUsedSlot = i + 1; - if (doubleSlot) + if (spill.valueKind == IrValueKind::Tvalue) { usedSpillSlots.set(i + 1); @@ -239,22 +207,15 @@ void IrRegAllocX64::preserve(IrInst& inst) maxUsedSlot = i + 2; } - IrSpillX64 spill; - spill.instIdx = function.getInstIndex(inst); - spill.useDoubleSlot = doubleSlot; spill.stackSlot = uint8_t(i); - spill.originalLoc = inst.regX64; - - spills.push_back(spill); - - freeReg(inst.regX64); - - inst.regX64 = noreg; - inst.spilled = true; - return; } - LUAU_ASSERT(!"nowhere to spill"); + spills.push_back(spill); + + freeReg(inst.regX64); + + inst.regX64 = noreg; + inst.spilled = true; } void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation) @@ -267,35 +228,34 @@ void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation) if (spill.instIdx == instIdx) { - LUAU_ASSERT(spill.stackSlot != kNoStackSlot); - RegisterX64 reg; + RegisterX64 reg = intoOriginalLocation ? takeReg(spill.originalLoc, instIdx) : allocReg(spill.originalLoc.size, instIdx); + OperandX64 restoreLocation = noreg; - if (spill.originalLoc.size == SizeX64::xmmword) + if (spill.stackSlot != kNoStackSlot) { - reg = intoOriginalLocation ? takeReg(spill.originalLoc, instIdx) : allocXmmReg(instIdx); + restoreLocation = addr[sSpillArea + spill.stackSlot * 8]; + restoreLocation.memSize = reg.size; - if (spill.useDoubleSlot) - build.vmovups(reg, xmmword[sSpillArea + spill.stackSlot * 8]); - else - build.vmovsd(reg, qword[sSpillArea + spill.stackSlot * 8]); + usedSpillSlots.set(spill.stackSlot, false); + + if (spill.valueKind == IrValueKind::Tvalue) + usedSpillSlots.set(spill.stackSlot + 1, false); } else { - reg = intoOriginalLocation ? takeReg(spill.originalLoc, instIdx) : allocGprReg(spill.originalLoc.size, instIdx); - - OperandX64 location = addr[sSpillArea + spill.stackSlot * 8]; - location.memSize = reg.size; // Override memory access size - build.mov(reg, location); + restoreLocation = getRestoreAddress(inst, getRestoreOp(inst)); } + if (spill.valueKind == IrValueKind::Tvalue) + build.vmovups(reg, restoreLocation); + else if (spill.valueKind == IrValueKind::Double) + build.vmovsd(reg, restoreLocation); + else + build.mov(reg, restoreLocation); + inst.regX64 = reg; inst.spilled = false; - usedSpillSlots.set(spill.stackSlot, false); - - if (spill.useDoubleSlot) - usedSpillSlots.set(spill.stackSlot + 1, false); - spills[i] = spills.back(); spills.pop_back(); return; @@ -334,6 +294,81 @@ bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const return false; } +unsigned IrRegAllocX64::findSpillStackSlot(IrValueKind valueKind) +{ + // Find a free stack slot. Two consecutive slots might be required for 16 byte TValues, so '- 1' is used + for (unsigned i = 0; i < unsigned(usedSpillSlots.size() - 1); ++i) + { + if (usedSpillSlots.test(i)) + continue; + + if (valueKind == IrValueKind::Tvalue && usedSpillSlots.test(i + 1)) + { + ++i; // No need to retest this double position + continue; + } + + return i; + } + + LUAU_ASSERT(!"nowhere to spill"); + return ~0u; +} + +IrOp IrRegAllocX64::getRestoreOp(const IrInst& inst) const +{ + switch (inst.cmd) + { + case IrCmd::LOAD_TAG: + case IrCmd::LOAD_POINTER: + case IrCmd::LOAD_DOUBLE: + case IrCmd::LOAD_INT: + case IrCmd::LOAD_TVALUE: + { + IrOp location = inst.a; + + // Might have an alternative location + if (IrOp alternative = function.findRestoreOp(inst); alternative.kind != IrOpKind::None) + location = alternative; + + if (location.kind == IrOpKind::VmReg || location.kind == IrOpKind::VmConst) + return location; + + break; + } + default: + break; + } + + return IrOp(); +} + +bool IrRegAllocX64::hasRestoreOp(const IrInst& inst) const +{ + return getRestoreOp(inst).kind != IrOpKind::None; +} + +OperandX64 IrRegAllocX64::getRestoreAddress(const IrInst& inst, IrOp restoreOp) +{ + switch (inst.cmd) + { + case IrCmd::LOAD_TAG: + return restoreOp.kind == IrOpKind::VmReg ? luauRegTag(vmRegOp(restoreOp)) : luauConstantTag(vmConstOp(restoreOp)); + case IrCmd::LOAD_POINTER: + case IrCmd::LOAD_DOUBLE: + return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp)); + case IrCmd::LOAD_INT: + LUAU_ASSERT(restoreOp.kind == IrOpKind::VmReg); + return luauRegValueInt(vmRegOp(restoreOp)); + case IrCmd::LOAD_TVALUE: + return restoreOp.kind == IrOpKind::VmReg ? luauReg(vmRegOp(restoreOp)) : luauConstant(vmConstOp(restoreOp)); + default: + break; + } + + return noreg; +} + uint32_t IrRegAllocX64::findInstructionWithFurthestNextUse(const std::array& regInstUsers) const { uint32_t furthestUseTarget = kInvalidInstIdx; @@ -411,11 +446,7 @@ ScopedRegX64::~ScopedRegX64() void ScopedRegX64::alloc(SizeX64 size) { LUAU_ASSERT(reg == noreg); - - if (size == SizeX64::xmmword) - reg = owner.allocXmmReg(kInvalidInstIdx); - else - reg = owner.allocGprReg(size, kInvalidInstIdx); + reg = owner.allocReg(size, kInvalidInstIdx); } void ScopedRegX64::free() @@ -435,38 +466,36 @@ RegisterX64 ScopedRegX64::release() ScopedSpills::ScopedSpills(IrRegAllocX64& owner) : owner(owner) { - snapshot = owner.spills; + startSpillId = owner.nextSpillId; } ScopedSpills::~ScopedSpills() { - // Taking a copy of current spills because we are going to potentially restore them - std::vector current = owner.spills; + unsigned endSpillId = owner.nextSpillId; - // Restore registers that were spilled inside scope protected by this object - for (IrSpillX64& curr : current) + for (size_t i = 0; i < owner.spills.size();) { - // If spill existed before current scope, it can be restored outside of it - if (!wasSpilledBefore(curr)) + IrSpillX64& spill = owner.spills[i]; + + // Restoring spills inside this scope cannot create new spills + LUAU_ASSERT(spill.spillId < endSpillId); + + // If spill was created inside current scope, it has to be restored + if (spill.spillId >= startSpillId) { - IrInst& inst = owner.function.instructions[curr.instIdx]; + IrInst& inst = owner.function.instructions[spill.instIdx]; owner.restore(inst, /*intoOriginalLocation*/ true); + + // Spill restore removes the spill entry, so loop is repeated at the same 'i' + } + else + { + i++; } } } -bool ScopedSpills::wasSpilledBefore(const IrSpillX64& spill) const -{ - for (const IrSpillX64& preexisting : snapshot) - { - if (spill.instIdx == preexisting.instIdx) - return true; - } - - return false; -} - } // namespace X64 } // namespace CodeGen } // namespace Luau diff --git a/CodeGen/src/IrTranslateBuiltins.cpp b/CodeGen/src/IrTranslateBuiltins.cpp index ba491564..539fcf77 100644 --- a/CodeGen/src/IrTranslateBuiltins.cpp +++ b/CodeGen/src/IrTranslateBuiltins.cpp @@ -8,6 +8,8 @@ // TODO: when nresults is less than our actual result count, we can skip computing/writing unused results +static const int kMinMaxUnrolledParams = 5; + namespace Luau { namespace CodeGen @@ -23,7 +25,7 @@ BuiltinImplResult translateBuiltinNumberToNumber( return {BuiltinImplType::None, -1}; build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); - build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); + build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1)); if (ra != arg) build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); @@ -40,7 +42,7 @@ BuiltinImplResult translateBuiltin2NumberToNumber( build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); build.loadAndCheckTag(args, LUA_TNUMBER, fallback); - build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); + build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(2), build.constInt(1)); if (ra != arg) build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); @@ -56,12 +58,13 @@ BuiltinImplResult translateBuiltinNumberTo2Number( return {BuiltinImplType::None, -1}; build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); - build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); + build.inst( + IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(nresults == 1 ? 1 : 2)); if (ra != arg) build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); - if (nresults > 1) + if (nresults != 1) build.inst(IrCmd::STORE_TAG, build.vmReg(ra + 1), build.constTag(LUA_TNUMBER)); return {BuiltinImplType::UsesFallback, 2}; @@ -125,12 +128,33 @@ BuiltinImplResult translateBuiltinMathLog( if (nparams < 1 || nresults > 1) return {BuiltinImplType::None, -1}; - build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); + LuauBuiltinFunction fcId = bfid; + int fcParams = 1; if (nparams != 1) - build.loadAndCheckTag(args, LUA_TNUMBER, fallback); + { + if (args.kind != IrOpKind::VmConst) + return {BuiltinImplType::None, -1}; - build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); + LUAU_ASSERT(build.function.proto); + TValue protok = build.function.proto->k[vmConstOp(args)]; + + if (protok.tt != LUA_TNUMBER) + return {BuiltinImplType::None, -1}; + + // TODO: IR builtin lowering assumes that the only valid 2-argument call is log2; ideally, we use a less hacky way to indicate that + if (protok.value.n == 2.0) + fcParams = 2; + else if (protok.value.n == 10.0) + fcId = LBF_MATH_LOG10; + else + // TODO: We can precompute log(args) and divide by it, but that requires extra LOAD/STORE so for now just fall back as this is rare + return {BuiltinImplType::None, -1}; + } + + build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); + + build.inst(IrCmd::FASTCALL, build.constUint(fcId), build.vmReg(ra), build.vmReg(arg), args, build.constInt(fcParams), build.constInt(1)); if (ra != arg) build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); @@ -140,17 +164,26 @@ BuiltinImplResult translateBuiltinMathLog( BuiltinImplResult translateBuiltinMathMin(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback) { - // TODO: this can be extended for other number of arguments - if (nparams != 2 || nresults > 1) + if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1) return {BuiltinImplType::None, -1}; build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); build.loadAndCheckTag(args, LUA_TNUMBER, fallback); + for (int i = 3; i <= nparams; ++i) + build.loadAndCheckTag(build.vmReg(vmRegOp(args) + (i - 2)), LUA_TNUMBER, fallback); + IrOp varg1 = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg)); IrOp varg2 = build.inst(IrCmd::LOAD_DOUBLE, args); IrOp res = build.inst(IrCmd::MIN_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins + + for (int i = 3; i <= nparams; ++i) + { + IrOp arg = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(vmRegOp(args) + (i - 2))); + res = build.inst(IrCmd::MIN_NUM, arg, res); + } + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res); if (ra != arg) @@ -161,17 +194,26 @@ BuiltinImplResult translateBuiltinMathMin(IrBuilder& build, int nparams, int ra, BuiltinImplResult translateBuiltinMathMax(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback) { - // TODO: this can be extended for other number of arguments - if (nparams != 2 || nresults > 1) + if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1) return {BuiltinImplType::None, -1}; build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback); build.loadAndCheckTag(args, LUA_TNUMBER, fallback); + for (int i = 3; i <= nparams; ++i) + build.loadAndCheckTag(build.vmReg(vmRegOp(args) + (i - 2)), LUA_TNUMBER, fallback); + IrOp varg1 = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg)); IrOp varg2 = build.inst(IrCmd::LOAD_DOUBLE, args); IrOp res = build.inst(IrCmd::MAX_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins + + for (int i = 3; i <= nparams; ++i) + { + IrOp arg = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(vmRegOp(args) + (i - 2))); + res = build.inst(IrCmd::MAX_NUM, arg, res); + } + build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res); if (ra != arg) @@ -254,8 +296,7 @@ BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, in if (nparams < 1 || nresults > 1) return {BuiltinImplType::None, -1}; - build.inst( - IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); + build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1)); build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING)); @@ -267,8 +308,7 @@ BuiltinImplResult translateBuiltinTypeof(IrBuilder& build, int nparams, int ra, if (nparams < 1 || nresults > 1) return {BuiltinImplType::None, -1}; - build.inst( - IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults)); + build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1)); build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING)); diff --git a/CodeGen/src/IrUtils.cpp b/CodeGen/src/IrUtils.cpp index c5e7c887..3811ca27 100644 --- a/CodeGen/src/IrUtils.cpp +++ b/CodeGen/src/IrUtils.cpp @@ -284,7 +284,7 @@ void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst repl block.useCount--; } -void substitute(IrFunction& function, IrInst& inst, IrOp replacement) +void substitute(IrFunction& function, IrInst& inst, IrOp replacement, IrOp location) { LUAU_ASSERT(!isBlockTerminator(inst.cmd)); @@ -298,7 +298,7 @@ void substitute(IrFunction& function, IrInst& inst, IrOp replacement) removeUse(function, inst.f); inst.a = replacement; - inst.b = {}; + inst.b = location; inst.c = {}; inst.d = {}; inst.e = {}; diff --git a/CodeGen/src/NativeState.cpp b/CodeGen/src/NativeState.cpp index 52479692..cb128de9 100644 --- a/CodeGen/src/NativeState.cpp +++ b/CodeGen/src/NativeState.cpp @@ -16,7 +16,7 @@ #include #include -#define CODEGEN_SET_FALLBACK(op, flags) data.context.fallback[op] = {execute_##op, flags} +#define CODEGEN_SET_FALLBACK(op) data.context.fallback[op] = {execute_##op} namespace Luau { @@ -36,20 +36,21 @@ NativeState::~NativeState() = default; void initFallbackTable(NativeState& data) { // When fallback is completely removed, remove it from includeInsts list in lvmexecute_split.py - CODEGEN_SET_FALLBACK(LOP_NEWCLOSURE, 0); - CODEGEN_SET_FALLBACK(LOP_NAMECALL, 0); - CODEGEN_SET_FALLBACK(LOP_FORGPREP, kFallbackUpdatePc); - CODEGEN_SET_FALLBACK(LOP_GETVARARGS, 0); - CODEGEN_SET_FALLBACK(LOP_DUPCLOSURE, 0); - CODEGEN_SET_FALLBACK(LOP_PREPVARARGS, 0); - CODEGEN_SET_FALLBACK(LOP_BREAK, 0); + CODEGEN_SET_FALLBACK(LOP_NEWCLOSURE); + CODEGEN_SET_FALLBACK(LOP_NAMECALL); + CODEGEN_SET_FALLBACK(LOP_FORGPREP); + CODEGEN_SET_FALLBACK(LOP_GETVARARGS); + CODEGEN_SET_FALLBACK(LOP_DUPCLOSURE); + CODEGEN_SET_FALLBACK(LOP_PREPVARARGS); + CODEGEN_SET_FALLBACK(LOP_BREAK); + CODEGEN_SET_FALLBACK(LOP_SETLIST); // Fallbacks that are called from partial implementation of an instruction // TODO: these fallbacks should be replaced with special functions that exclude the (redundantly executed) fast path from the fallback - CODEGEN_SET_FALLBACK(LOP_GETGLOBAL, 0); - CODEGEN_SET_FALLBACK(LOP_SETGLOBAL, 0); - CODEGEN_SET_FALLBACK(LOP_GETTABLEKS, 0); - CODEGEN_SET_FALLBACK(LOP_SETTABLEKS, 0); + CODEGEN_SET_FALLBACK(LOP_GETGLOBAL); + CODEGEN_SET_FALLBACK(LOP_SETGLOBAL); + CODEGEN_SET_FALLBACK(LOP_GETTABLEKS); + CODEGEN_SET_FALLBACK(LOP_SETTABLEKS); } void initHelperFunctions(NativeState& data) @@ -105,6 +106,7 @@ void initHelperFunctions(NativeState& data) data.context.libm_tan = tan; data.context.libm_tanh = tanh; + data.context.forgLoopTableIter = forgLoopTableIter; data.context.forgLoopNodeIter = forgLoopNodeIter; data.context.forgLoopNonTableFallback = forgLoopNonTableFallback; data.context.forgPrepXnextFallback = forgPrepXnextFallback; diff --git a/CodeGen/src/NativeState.h b/CodeGen/src/NativeState.h index 2d97e63c..99d40890 100644 --- a/CodeGen/src/NativeState.h +++ b/CodeGen/src/NativeState.h @@ -23,15 +23,7 @@ namespace CodeGen class UnwindBuilder; -using FallbackFn = const Instruction*(lua_State* L, const Instruction* pc, StkId base, TValue* k); - -constexpr uint8_t kFallbackUpdatePc = 1 << 0; - -struct NativeFallback -{ - FallbackFn* fallback; - uint8_t flags; -}; +using FallbackFn = const Instruction* (*)(lua_State* L, const Instruction* pc, StkId base, TValue* k); struct NativeProto { @@ -96,6 +88,7 @@ struct NativeContext double (*libm_modf)(double, double*) = nullptr; // Helper functions + bool (*forgLoopTableIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr; bool (*forgLoopNodeIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr; bool (*forgLoopNonTableFallback)(lua_State* L, int insnA, int aux) = nullptr; void (*forgPrepXnextFallback)(lua_State* L, TValue* ra, int pc) = nullptr; @@ -106,7 +99,7 @@ struct NativeContext Closure* (*returnFallback)(lua_State* L, StkId ra, int n) = nullptr; // Opcode fallbacks, implemented in C - NativeFallback fallback[LOP__COUNT] = {}; + FallbackFn fallback[LOP__COUNT] = {}; // Fast call methods, implemented in C luau_FastFunction luauF_table[256] = {}; diff --git a/CodeGen/src/OptimizeConstProp.cpp b/CodeGen/src/OptimizeConstProp.cpp index 7157a18c..c7d3d8e9 100644 --- a/CodeGen/src/OptimizeConstProp.cpp +++ b/CodeGen/src/OptimizeConstProp.cpp @@ -502,6 +502,8 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& } } break; + + // TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it case IrCmd::FASTCALL: case IrCmd::INVOKE_FASTCALL: handleBuiltinEffects(state, LuauBuiltinFunction(function.uintOp(inst.a)), vmRegOp(inst.b), function.intOp(inst.f)); diff --git a/CodeGen/src/UnwindBuilderDwarf2.cpp b/CodeGen/src/UnwindBuilderDwarf2.cpp index 0b3134ba..b20a6b25 100644 --- a/CodeGen/src/UnwindBuilderDwarf2.cpp +++ b/CodeGen/src/UnwindBuilderDwarf2.cpp @@ -132,7 +132,7 @@ size_t UnwindBuilderDwarf2::getBeginOffset() const return beginOffset; } -void UnwindBuilderDwarf2::start() +void UnwindBuilderDwarf2::startInfo() { uint8_t* cieLength = pos; pos = writeu32(pos, 0); // Length (to be filled later) @@ -149,13 +149,23 @@ void UnwindBuilderDwarf2::start() // Optional CIE augmentation section (not present) // Call frame instructions (common for all FDEs, of which we have 1) - stackOffset = 8; // Return address was pushed by calling the function - - pos = defineCfaExpression(pos, DW_REG_RSP, stackOffset); // Define CFA to be the rsp + 8 + pos = defineCfaExpression(pos, DW_REG_RSP, 8); // Define CFA to be the rsp + 8 pos = defineSavedRegisterLocation(pos, DW_REG_RA, 8); // Define return address register (RA) to be located at CFA - 8 pos = alignPosition(cieLength, pos); writeu32(cieLength, unsigned(pos - cieLength - 4)); // Length field itself is excluded from length +} + +void UnwindBuilderDwarf2::startFunction() +{ + // End offset is filled in later and everything gets adjusted at the end + UnwindFunctionDwarf2 func; + func.beginOffset = 0; + func.endOffset = 0; + func.fdeEntryStartPos = uint32_t(pos - rawData); + unwindFunctions.push_back(func); + + stackOffset = 8; // Return address was pushed by calling the function fdeEntryStart = pos; // Will be written at the end pos = writeu32(pos, 0); // Length (to be filled later) @@ -198,14 +208,20 @@ void UnwindBuilderDwarf2::setupFrameReg(X64::RegisterX64 reg, int espOffset) // Cfa is based on rsp, so no additonal commands are required } -void UnwindBuilderDwarf2::finish() +void UnwindBuilderDwarf2::finishFunction(uint32_t beginOffset, uint32_t endOffset) { + unwindFunctions.back().beginOffset = beginOffset; + unwindFunctions.back().endOffset = endOffset; + LUAU_ASSERT(stackOffset % 16 == 0 && "stack has to be aligned to 16 bytes after prologue"); LUAU_ASSERT(fdeEntryStart != nullptr); pos = alignPosition(fdeEntryStart, pos); writeu32(fdeEntryStart, unsigned(pos - fdeEntryStart - 4)); // Length field itself is excluded from length +} +void UnwindBuilderDwarf2::finishInfo() +{ // Terminate section pos = writeu32(pos, 0); @@ -217,15 +233,26 @@ size_t UnwindBuilderDwarf2::getSize() const return size_t(pos - rawData); } -void UnwindBuilderDwarf2::finalize(char* target, void* funcAddress, size_t funcSize) const +size_t UnwindBuilderDwarf2::getFunctionCount() const +{ + return unwindFunctions.size(); +} + +void UnwindBuilderDwarf2::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const { memcpy(target, rawData, getSize()); - LUAU_ASSERT(fdeEntryStart != nullptr); - unsigned fdeEntryStartPos = unsigned(fdeEntryStart - rawData); + for (const UnwindFunctionDwarf2& func : unwindFunctions) + { + uint8_t* fdeEntryStart = (uint8_t*)target + func.fdeEntryStartPos; - writeu64((uint8_t*)target + fdeEntryStartPos + kFdeInitialLocationOffset, uintptr_t(funcAddress)); - writeu64((uint8_t*)target + fdeEntryStartPos + kFdeAddressRangeOffset, funcSize); + writeu64(fdeEntryStart + kFdeInitialLocationOffset, uintptr_t(funcAddress) + offset + func.beginOffset); + + if (func.endOffset == kFullBlockFuncton) + writeu64(fdeEntryStart + kFdeAddressRangeOffset, funcSize - offset); + else + writeu64(fdeEntryStart + kFdeAddressRangeOffset, func.endOffset - func.beginOffset); + } } } // namespace CodeGen diff --git a/CodeGen/src/UnwindBuilderWin.cpp b/CodeGen/src/UnwindBuilderWin.cpp index 21733001..5f4f16a9 100644 --- a/CodeGen/src/UnwindBuilderWin.cpp +++ b/CodeGen/src/UnwindBuilderWin.cpp @@ -21,17 +21,6 @@ namespace Luau namespace CodeGen { -// This struct matches the layout of UNWIND_INFO from ehdata.h -struct UnwindInfoWin -{ - uint8_t version : 3; - uint8_t flags : 5; - uint8_t prologsize; - uint8_t unwindcodecount; - uint8_t framereg : 4; - uint8_t frameregoff : 4; -}; - void UnwindBuilderWin::setBeginOffset(size_t beginOffset) { this->beginOffset = beginOffset; @@ -42,11 +31,28 @@ size_t UnwindBuilderWin::getBeginOffset() const return beginOffset; } -void UnwindBuilderWin::start() -{ - stackOffset = 8; // Return address was pushed by calling the function +void UnwindBuilderWin::startInfo() {} +void UnwindBuilderWin::startFunction() +{ + // End offset is filled in later and everything gets adjusted at the end + UnwindFunctionWin func; + func.beginOffset = 0; + func.endOffset = 0; + func.unwindInfoOffset = uint32_t(rawDataPos - rawData); + unwindFunctions.push_back(func); + + unwindCodes.clear(); unwindCodes.reserve(16); + + prologSize = 0; + + // rax has register index 0, which in Windows unwind info means that frame register is not used + frameReg = X64::rax; + frameRegOffset = 0; + + // Return address was pushed by calling the function + stackOffset = 8; } void UnwindBuilderWin::spill(int espOffset, X64::RegisterX64 reg) @@ -85,49 +91,89 @@ void UnwindBuilderWin::setupFrameReg(X64::RegisterX64 reg, int espOffset) unwindCodes.push_back({prologSize, UWOP_SET_FPREG, frameRegOffset}); } -void UnwindBuilderWin::finish() +void UnwindBuilderWin::finishFunction(uint32_t beginOffset, uint32_t endOffset) { + unwindFunctions.back().beginOffset = beginOffset; + unwindFunctions.back().endOffset = endOffset; + // Windows unwind code count is stored in uint8_t, so we can't have more LUAU_ASSERT(unwindCodes.size() < 256); LUAU_ASSERT(stackOffset % 16 == 0 && "stack has to be aligned to 16 bytes after prologue"); - size_t codeArraySize = unwindCodes.size(); - codeArraySize = (codeArraySize + 1) & ~1; // Size has to be even, but unwind code count doesn't have to - - infoSize = sizeof(UnwindInfoWin) + sizeof(UnwindCodeWin) * codeArraySize; -} - -size_t UnwindBuilderWin::getSize() const -{ - return infoSize; -} - -void UnwindBuilderWin::finalize(char* target, void* funcAddress, size_t funcSize) const -{ UnwindInfoWin info; info.version = 1; info.flags = 0; // No EH info.prologsize = prologSize; info.unwindcodecount = uint8_t(unwindCodes.size()); + + LUAU_ASSERT(frameReg.index < 16); info.framereg = frameReg.index; + + LUAU_ASSERT(frameRegOffset < 16); info.frameregoff = frameRegOffset; - memcpy(target, &info, sizeof(info)); - target += sizeof(UnwindInfoWin); + LUAU_ASSERT(rawDataPos + sizeof(info) <= rawData + kRawDataLimit); + memcpy(rawDataPos, &info, sizeof(info)); + rawDataPos += sizeof(info); if (!unwindCodes.empty()) { // Copy unwind codes in reverse order // Some unwind codes take up two array slots, but we don't use those atm - char* pos = target + sizeof(UnwindCodeWin) * (unwindCodes.size() - 1); + uint8_t* unwindCodePos = rawDataPos + sizeof(UnwindCodeWin) * (unwindCodes.size() - 1); + LUAU_ASSERT(unwindCodePos <= rawData + kRawDataLimit); for (size_t i = 0; i < unwindCodes.size(); i++) { - memcpy(pos, &unwindCodes[i], sizeof(UnwindCodeWin)); - pos -= sizeof(UnwindCodeWin); + memcpy(unwindCodePos, &unwindCodes[i], sizeof(UnwindCodeWin)); + unwindCodePos -= sizeof(UnwindCodeWin); } } + + rawDataPos += sizeof(UnwindCodeWin) * unwindCodes.size(); + + // Size has to be even, but unwind code count doesn't have to + if (unwindCodes.size() % 2 != 0) + rawDataPos += sizeof(UnwindCodeWin); + + LUAU_ASSERT(rawDataPos <= rawData + kRawDataLimit); +} + +void UnwindBuilderWin::finishInfo() {} + +size_t UnwindBuilderWin::getSize() const +{ + return sizeof(UnwindFunctionWin) * unwindFunctions.size() + size_t(rawDataPos - rawData); +} + +size_t UnwindBuilderWin::getFunctionCount() const +{ + return unwindFunctions.size(); +} + +void UnwindBuilderWin::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const +{ + // Copy adjusted function information + for (UnwindFunctionWin func : unwindFunctions) + { + // Code will start after the unwind info + func.beginOffset += uint32_t(offset); + + // Whole block is a part of a 'single function' + if (func.endOffset == kFullBlockFuncton) + func.endOffset = uint32_t(funcSize); + else + func.endOffset += uint32_t(offset); + + // Unwind data is placed right after the RUNTIME_FUNCTION data + func.unwindInfoOffset += uint32_t(sizeof(UnwindFunctionWin) * unwindFunctions.size()); + memcpy(target, &func, sizeof(func)); + target += sizeof(func); + } + + // Copy unwind codes + memcpy(target, rawData, size_t(rawDataPos - rawData)); } } // namespace CodeGen diff --git a/Sources.cmake b/Sources.cmake index 3508ec39..9f54b91e 100644 --- a/Sources.cmake +++ b/Sources.cmake @@ -89,9 +89,7 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/src/CodeGenA64.cpp CodeGen/src/CodeGenX64.cpp CodeGen/src/EmitBuiltinsX64.cpp - CodeGen/src/EmitCommonA64.cpp CodeGen/src/EmitCommonX64.cpp - CodeGen/src/EmitInstructionA64.cpp CodeGen/src/EmitInstructionX64.cpp CodeGen/src/Fallbacks.cpp CodeGen/src/IrAnalysis.cpp @@ -111,6 +109,7 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/src/UnwindBuilderDwarf2.cpp CodeGen/src/UnwindBuilderWin.cpp + CodeGen/src/BitUtils.h CodeGen/src/ByteUtils.h CodeGen/src/CustomExecUtils.h CodeGen/src/CodeGenUtils.h @@ -120,7 +119,6 @@ target_sources(Luau.CodeGen PRIVATE CodeGen/src/EmitCommon.h CodeGen/src/EmitCommonA64.h CodeGen/src/EmitCommonX64.h - CodeGen/src/EmitInstructionA64.h CodeGen/src/EmitInstructionX64.h CodeGen/src/Fallbacks.h CodeGen/src/FallbacksProlog.h diff --git a/VM/src/lapi.cpp b/VM/src/lapi.cpp index 08d64d55..054faa7c 100644 --- a/VM/src/lapi.cpp +++ b/VM/src/lapi.cpp @@ -538,6 +538,8 @@ const void* lua_topointer(lua_State* L, int idx) StkId o = index2addr(L, idx); switch (ttype(o)) { + case LUA_TSTRING: + return tsvalue(o); case LUA_TTABLE: return hvalue(o); case LUA_TFUNCTION: diff --git a/VM/src/ltable.cpp b/VM/src/ltable.cpp index 5eceea74..c963ac8d 100644 --- a/VM/src/ltable.cpp +++ b/VM/src/ltable.cpp @@ -33,8 +33,6 @@ #include -LUAU_FASTFLAGVARIABLE(LuauArrBoundResizeFix, false) - // max size of both array and hash part is 2^MAXBITS #define MAXBITS 26 #define MAXSIZE (1 << MAXBITS) @@ -466,30 +464,22 @@ static void rehash(lua_State* L, Table* t, const TValue* ek) int na = computesizes(nums, &nasize); int nh = totaluse - na; - if (FFlag::LuauArrBoundResizeFix) + // enforce the boundary invariant; for performance, only do hash lookups if we must + int nadjusted = adjustasize(t, nasize, ek); + + // count how many extra elements belong to array part instead of hash part + int aextra = nadjusted - nasize; + + if (aextra != 0) { - // enforce the boundary invariant; for performance, only do hash lookups if we must - int nadjusted = adjustasize(t, nasize, ek); + // we no longer need to store those extra array elements in hash part + nh -= aextra; - // count how many extra elements belong to array part instead of hash part - int aextra = nadjusted - nasize; + // because hash nodes are twice as large as array nodes, the memory we saved for hash parts can be used by array part + // this follows the general sparse array part optimization where array is allocated when 50% occupation is reached + nasize = nadjusted + aextra; - if (aextra != 0) - { - // we no longer need to store those extra array elements in hash part - nh -= aextra; - - // because hash nodes are twice as large as array nodes, the memory we saved for hash parts can be used by array part - // this follows the general sparse array part optimization where array is allocated when 50% occupation is reached - nasize = nadjusted + aextra; - - // since the size was changed, it's again important to enforce the boundary invariant at the new size - nasize = adjustasize(t, nasize, ek); - } - } - else - { - // enforce the boundary invariant; for performance, only do hash lookups if we must + // since the size was changed, it's again important to enforce the boundary invariant at the new size nasize = adjustasize(t, nasize, ek); } diff --git a/fuzz/linter.cpp b/fuzz/linter.cpp index 854c6327..8efd4246 100644 --- a/fuzz/linter.cpp +++ b/fuzz/linter.cpp @@ -21,7 +21,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) static Luau::NullFileResolver fileResolver; static Luau::NullConfigResolver configResolver; static Luau::Frontend frontend{&fileResolver, &configResolver}; - static int once = (Luau::registerBuiltinGlobals(frontend), 1); + static int once = (Luau::registerBuiltinGlobals(frontend, frontend.globals, false), 1); (void)once; static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1); (void)once2; diff --git a/fuzz/proto.cpp b/fuzz/proto.cpp index ffeb4919..9366da5e 100644 --- a/fuzz/proto.cpp +++ b/fuzz/proto.cpp @@ -97,12 +97,12 @@ lua_State* createGlobalState() return L; } -int registerTypes(Luau::TypeChecker& typeChecker, Luau::GlobalTypes& globals) +int registerTypes(Luau::Frontend& frontend, Luau::GlobalTypes& globals, bool forAutocomplete) { using namespace Luau; using std::nullopt; - Luau::registerBuiltinGlobals(typeChecker, globals); + Luau::registerBuiltinGlobals(frontend, globals, forAutocomplete); TypeArena& arena = globals.globalTypes; BuiltinTypes& builtinTypes = *globals.builtinTypes; @@ -147,10 +147,10 @@ int registerTypes(Luau::TypeChecker& typeChecker, Luau::GlobalTypes& globals) static void setupFrontend(Luau::Frontend& frontend) { - registerTypes(frontend.typeChecker, frontend.globals); + registerTypes(frontend, frontend.globals, false); Luau::freeze(frontend.globals.globalTypes); - registerTypes(frontend.typeCheckerForAutocomplete, frontend.globalsForAutocomplete); + registerTypes(frontend, frontend.globalsForAutocomplete, true); Luau::freeze(frontend.globalsForAutocomplete.globalTypes); frontend.iceHandler.onInternalError = [](const char* error) { diff --git a/fuzz/typeck.cpp b/fuzz/typeck.cpp index 4f8f8857..87a88271 100644 --- a/fuzz/typeck.cpp +++ b/fuzz/typeck.cpp @@ -26,7 +26,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) static Luau::NullFileResolver fileResolver; static Luau::NullConfigResolver configResolver; static Luau::Frontend frontend{&fileResolver, &configResolver}; - static int once = (Luau::registerBuiltinGlobals(frontend), 1); + static int once = (Luau::registerBuiltinGlobals(frontend, frontend.globals, false), 1); (void)once; static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1); (void)once2; diff --git a/tests/AssemblyBuilderA64.test.cpp b/tests/AssemblyBuilderA64.test.cpp index 1690c748..a0df0f9b 100644 --- a/tests/AssemblyBuilderA64.test.cpp +++ b/tests/AssemblyBuilderA64.test.cpp @@ -86,6 +86,7 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Binary") SINGLE_COMPARE(add(x0, x1, x2, 7), 0x8B021C20); SINGLE_COMPARE(sub(x0, x1, x2), 0xCB020020); SINGLE_COMPARE(and_(x0, x1, x2), 0x8A020020); + SINGLE_COMPARE(bic(x0, x1, x2), 0x8A220020); SINGLE_COMPARE(orr(x0, x1, x2), 0xAA020020); SINGLE_COMPARE(eor(x0, x1, x2), 0xCA020020); SINGLE_COMPARE(lsl(x0, x1, x2), 0x9AC22020); @@ -94,6 +95,7 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Binary") SINGLE_COMPARE(asr(x0, x1, x2), 0x9AC22820); SINGLE_COMPARE(ror(x0, x1, x2), 0x9AC22C20); SINGLE_COMPARE(cmp(x0, x1), 0xEB01001F); + SINGLE_COMPARE(tst(x0, x1), 0xEA01001F); // reg, imm SINGLE_COMPARE(add(x3, x7, 78), 0x910138E3); @@ -102,6 +104,24 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Binary") SINGLE_COMPARE(cmp(w0, 42), 0x7100A81F); } +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "BinaryImm") +{ + // instructions + SINGLE_COMPARE(and_(w1, w2, 1), 0x12000041); + SINGLE_COMPARE(orr(w1, w2, 1), 0x32000041); + SINGLE_COMPARE(eor(w1, w2, 1), 0x52000041); + SINGLE_COMPARE(tst(w1, 1), 0x7200003f); + + // various mask forms + SINGLE_COMPARE(and_(w0, w0, 1), 0x12000000); + SINGLE_COMPARE(and_(w0, w0, 3), 0x12000400); + SINGLE_COMPARE(and_(w0, w0, 7), 0x12000800); + SINGLE_COMPARE(and_(w0, w0, 2147483647), 0x12007800); + SINGLE_COMPARE(and_(w0, w0, 6), 0x121F0400); + SINGLE_COMPARE(and_(w0, w0, 12), 0x121E0400); + SINGLE_COMPARE(and_(w0, w0, 2147483648), 0x12010000); +} + TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Loads") { // address forms @@ -359,11 +379,13 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOffsetSize") SINGLE_COMPARE(str(q0, mem(x1, 16)), 0x3D800420); } -TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "ConditionalSelect") +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Conditionals") { SINGLE_COMPARE(csel(x0, x1, x2, ConditionA64::Equal), 0x9A820020); SINGLE_COMPARE(csel(w0, w1, w2, ConditionA64::Equal), 0x1A820020); SINGLE_COMPARE(fcsel(d0, d1, d2, ConditionA64::Equal), 0x1E620C20); + + SINGLE_COMPARE(cset(x1, ConditionA64::Less), 0x9A9FA7E1); } TEST_CASE("LogTest") @@ -394,6 +416,7 @@ TEST_CASE("LogTest") build.ldr(q1, x2); build.csel(x0, x1, x2, ConditionA64::Equal); + build.cset(x0, ConditionA64::Equal); build.fcmp(d0, d1); build.fcmpz(d0); @@ -423,6 +446,7 @@ TEST_CASE("LogTest") fabs d1,d2 ldr q1,[x2] csel x0,x1,x2,eq + cset x0,eq fcmp d0,d1 fcmp d0,#0 .L1: diff --git a/tests/AssemblyBuilderX64.test.cpp b/tests/AssemblyBuilderX64.test.cpp index 054eca7b..bafb68bc 100644 --- a/tests/AssemblyBuilderX64.test.cpp +++ b/tests/AssemblyBuilderX64.test.cpp @@ -67,6 +67,9 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseBinaryInstructionForms") SINGLE_COMPARE(add(rax, 0x7f), 0x48, 0x83, 0xc0, 0x7f); SINGLE_COMPARE(add(rax, 0x80), 0x48, 0x81, 0xc0, 0x80, 0x00, 0x00, 0x00); SINGLE_COMPARE(add(r10, 0x7fffffff), 0x49, 0x81, 0xc2, 0xff, 0xff, 0xff, 0x7f); + SINGLE_COMPARE(add(al, 3), 0x80, 0xc0, 0x03); + SINGLE_COMPARE(add(sil, 3), 0x48, 0x80, 0xc6, 0x03); + SINGLE_COMPARE(add(r11b, 3), 0x49, 0x80, 0xc3, 0x03); // reg, [reg] SINGLE_COMPARE(add(rax, qword[rax]), 0x48, 0x03, 0x00); @@ -191,6 +194,8 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov") SINGLE_COMPARE(mov64(rcx, 0x1234567812345678ll), 0x48, 0xb9, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12); SINGLE_COMPARE(mov(ecx, 2), 0xb9, 0x02, 0x00, 0x00, 0x00); SINGLE_COMPARE(mov(cl, 2), 0xb1, 0x02); + SINGLE_COMPARE(mov(sil, 2), 0x48, 0xb6, 0x02); + SINGLE_COMPARE(mov(r9b, 2), 0x49, 0xb1, 0x02); SINGLE_COMPARE(mov(rcx, qword[rdi]), 0x48, 0x8b, 0x0f); SINGLE_COMPARE(mov(dword[rax], 0xabcd), 0xc7, 0x00, 0xcd, 0xab, 0x00, 0x00); SINGLE_COMPARE(mov(r13, 1), 0x49, 0xbd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); @@ -201,6 +206,8 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov") SINGLE_COMPARE(mov(qword[rdx], r9), 0x4c, 0x89, 0x0a); SINGLE_COMPARE(mov(byte[rsi], 0x3), 0xc6, 0x06, 0x03); SINGLE_COMPARE(mov(byte[rsi], al), 0x88, 0x06); + SINGLE_COMPARE(mov(byte[rsi], dil), 0x48, 0x88, 0x3e); + SINGLE_COMPARE(mov(byte[rsi], r10b), 0x4c, 0x88, 0x16); } TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMovExtended") @@ -229,6 +236,8 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfShift") { SINGLE_COMPARE(shl(al, 1), 0xd0, 0xe0); SINGLE_COMPARE(shl(al, cl), 0xd2, 0xe0); + SINGLE_COMPARE(shl(sil, cl), 0x48, 0xd2, 0xe6); + SINGLE_COMPARE(shl(r10b, cl), 0x49, 0xd2, 0xe2); SINGLE_COMPARE(shr(al, 4), 0xc0, 0xe8, 0x04); SINGLE_COMPARE(shr(eax, 1), 0xd1, 0xe8); SINGLE_COMPARE(sal(eax, cl), 0xd3, 0xe0); @@ -247,6 +256,7 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfLea") TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfSetcc") { SINGLE_COMPARE(setcc(ConditionX64::NotEqual, bl), 0x0f, 0x95, 0xc3); + SINGLE_COMPARE(setcc(ConditionX64::NotEqual, dil), 0x48, 0x0f, 0x95, 0xc7); SINGLE_COMPARE(setcc(ConditionX64::BelowEqual, byte[rcx]), 0x0f, 0x96, 0x01); } diff --git a/tests/Autocomplete.test.cpp b/tests/Autocomplete.test.cpp index c79bf35e..3dc75d62 100644 --- a/tests/Autocomplete.test.cpp +++ b/tests/Autocomplete.test.cpp @@ -3473,4 +3473,34 @@ TEST_CASE_FIXTURE(ACFixture, "autocomplete_response_perf1" * doctest::timeout(0. CHECK(ac.entryMap.count("Instance")); } +TEST_CASE_FIXTURE(ACFixture, "strict_mode_force") +{ + check(R"( +--!nonstrict +local a: {x: number} = {x=1} +local b = a +local c = b.@1 + )"); + + auto ac = autocomplete('1'); + + CHECK_EQ(1, ac.entryMap.size()); + CHECK(ac.entryMap.count("x")); +} + +TEST_CASE_FIXTURE(ACFixture, "suggest_exported_types") +{ + ScopedFastFlag luauCopyExportedTypes{"LuauCopyExportedTypes", true}; + + check(R"( +export type Type = {a: number} +local a: T@1 + )"); + + auto ac = autocomplete('1'); + + CHECK(ac.entryMap.count("Type")); + CHECK_EQ(ac.context, AutocompleteContext::Type); +} + TEST_SUITE_END(); diff --git a/tests/CodeAllocator.test.cpp b/tests/CodeAllocator.test.cpp index 359f2ba1..01deddd3 100644 --- a/tests/CodeAllocator.test.cpp +++ b/tests/CodeAllocator.test.cpp @@ -135,7 +135,8 @@ TEST_CASE("WindowsUnwindCodesX64") UnwindBuilderWin unwind; - unwind.start(); + unwind.startInfo(); + unwind.startFunction(); unwind.spill(16, rdx); unwind.spill(8, rcx); unwind.save(rdi); @@ -148,14 +149,15 @@ TEST_CASE("WindowsUnwindCodesX64") unwind.save(r15); unwind.allocStack(72); unwind.setupFrameReg(rbp, 48); - unwind.finish(); + unwind.finishFunction(0x11223344, 0x55443322); + unwind.finishInfo(); std::vector data; data.resize(unwind.getSize()); - unwind.finalize(data.data(), nullptr, 0); + unwind.finalize(data.data(), 0, nullptr, 0); - std::vector expected{0x01, 0x23, 0x0a, 0x35, 0x23, 0x33, 0x1e, 0x82, 0x1a, 0xf0, 0x18, 0xe0, 0x16, 0xd0, 0x14, 0xc0, 0x12, 0x50, 0x10, - 0x30, 0x0e, 0x60, 0x0c, 0x70}; + std::vector expected{0x44, 0x33, 0x22, 0x11, 0x22, 0x33, 0x44, 0x55, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x23, 0x0a, 0x35, 0x23, 0x33, 0x1e, + 0x82, 0x1a, 0xf0, 0x18, 0xe0, 0x16, 0xd0, 0x14, 0xc0, 0x12, 0x50, 0x10, 0x30, 0x0e, 0x60, 0x0c, 0x70}; REQUIRE(data.size() == expected.size()); CHECK(memcmp(data.data(), expected.data(), expected.size()) == 0); @@ -168,7 +170,8 @@ TEST_CASE("Dwarf2UnwindCodesX64") UnwindBuilderDwarf2 unwind; - unwind.start(); + unwind.startInfo(); + unwind.startFunction(); unwind.save(rdi); unwind.save(rsi); unwind.save(rbx); @@ -179,11 +182,12 @@ TEST_CASE("Dwarf2UnwindCodesX64") unwind.save(r15); unwind.allocStack(72); unwind.setupFrameReg(rbp, 48); - unwind.finish(); + unwind.finishFunction(0, 0); + unwind.finishInfo(); std::vector data; data.resize(unwind.getSize()); - unwind.finalize(data.data(), nullptr, 0); + unwind.finalize(data.data(), 0, nullptr, 0); std::vector expected{0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x78, 0x10, 0x0c, 0x07, 0x08, 0x05, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -211,6 +215,8 @@ constexpr X64::RegisterX64 rArg3 = X64::rdx; constexpr X64::RegisterX64 rNonVol1 = X64::r12; constexpr X64::RegisterX64 rNonVol2 = X64::rbx; +constexpr X64::RegisterX64 rNonVol3 = X64::r13; +constexpr X64::RegisterX64 rNonVol4 = X64::r14; TEST_CASE("GeneratedCodeExecutionX64") { @@ -260,7 +266,10 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64") std::unique_ptr unwind = std::make_unique(); #endif - unwind->start(); + unwind->startInfo(); + + Label functionBegin = build.setLabel(); + unwind->startFunction(); // Prologue build.push(rNonVol1); @@ -279,8 +288,6 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64") build.lea(rbp, addr[rsp + stackSize]); unwind->setupFrameReg(rbp, stackSize); - unwind->finish(); - // Body build.mov(rNonVol1, rArg1); build.mov(rNonVol2, rArg2); @@ -296,8 +303,12 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64") build.pop(rNonVol1); build.ret(); + unwind->finishFunction(build.getLabelOffset(functionBegin), ~0u); + build.finalize(); + unwind->finishInfo(); + size_t blockSize = 1024 * 1024; size_t maxTotalSize = 1024 * 1024; CodeAllocator allocator(blockSize, maxTotalSize); @@ -326,6 +337,152 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64") } } +TEST_CASE("GeneratedCodeExecutionMultipleFunctionsWithThrowX64") +{ + using namespace X64; + + AssemblyBuilderX64 build(/* logText= */ false); + +#if defined(_WIN32) + std::unique_ptr unwind = std::make_unique(); +#else + std::unique_ptr unwind = std::make_unique(); +#endif + + unwind->startInfo(); + + Label start1; + Label start2; + + // First function + { + build.setLabel(start1); + unwind->startFunction(); + + // Prologue + build.push(rNonVol1); + unwind->save(rNonVol1); + build.push(rNonVol2); + unwind->save(rNonVol2); + build.push(rbp); + unwind->save(rbp); + + int stackSize = 32; + int localsSize = 16; + + build.sub(rsp, stackSize + localsSize); + unwind->allocStack(stackSize + localsSize); + + build.lea(rbp, addr[rsp + stackSize]); + unwind->setupFrameReg(rbp, stackSize); + + // Body + build.mov(rNonVol1, rArg1); + build.mov(rNonVol2, rArg2); + + build.add(rNonVol1, 15); + build.mov(rArg1, rNonVol1); + build.call(rNonVol2); + + // Epilogue + build.lea(rsp, addr[rbp + localsSize]); + build.pop(rbp); + build.pop(rNonVol2); + build.pop(rNonVol1); + build.ret(); + + Label end1 = build.setLabel(); + unwind->finishFunction(build.getLabelOffset(start1), build.getLabelOffset(end1)); + } + + // Second function with different layout + { + build.setLabel(start2); + unwind->startFunction(); + + // Prologue + build.push(rNonVol1); + unwind->save(rNonVol1); + build.push(rNonVol2); + unwind->save(rNonVol2); + build.push(rNonVol3); + unwind->save(rNonVol3); + build.push(rNonVol4); + unwind->save(rNonVol4); + build.push(rbp); + unwind->save(rbp); + + int stackSize = 32; + int localsSize = 32; + + build.sub(rsp, stackSize + localsSize); + unwind->allocStack(stackSize + localsSize); + + build.lea(rbp, addr[rsp + stackSize]); + unwind->setupFrameReg(rbp, stackSize); + + // Body + build.mov(rNonVol3, rArg1); + build.mov(rNonVol4, rArg2); + + build.add(rNonVol3, 15); + build.mov(rArg1, rNonVol3); + build.call(rNonVol4); + + // Epilogue + build.lea(rsp, addr[rbp + localsSize]); + build.pop(rbp); + build.pop(rNonVol4); + build.pop(rNonVol3); + build.pop(rNonVol2); + build.pop(rNonVol1); + build.ret(); + + unwind->finishFunction(build.getLabelOffset(start2), ~0u); + } + + build.finalize(); + + unwind->finishInfo(); + + size_t blockSize = 1024 * 1024; + size_t maxTotalSize = 1024 * 1024; + CodeAllocator allocator(blockSize, maxTotalSize); + + allocator.context = unwind.get(); + allocator.createBlockUnwindInfo = createBlockUnwindInfo; + allocator.destroyBlockUnwindInfo = destroyBlockUnwindInfo; + + uint8_t* nativeData; + size_t sizeNativeData; + uint8_t* nativeEntry; + REQUIRE(allocator.allocate(build.data.data(), build.data.size(), build.code.data(), build.code.size(), nativeData, sizeNativeData, nativeEntry)); + REQUIRE(nativeEntry); + + using FunctionType = int64_t(int64_t, void (*)(int64_t)); + FunctionType* f1 = (FunctionType*)(nativeEntry + start1.location); + FunctionType* f2 = (FunctionType*)(nativeEntry + start2.location); + + // To simplify debugging, CHECK_THROWS_WITH_AS is not used here + try + { + f1(10, throwing); + } + catch (const std::runtime_error& error) + { + CHECK(strcmp(error.what(), "testing") == 0); + } + + try + { + f2(10, throwing); + } + catch (const std::runtime_error& error) + { + CHECK(strcmp(error.what(), "testing") == 0); + } +} + TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64") { using namespace X64; @@ -338,7 +495,10 @@ TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64") std::unique_ptr unwind = std::make_unique(); #endif - unwind->start(); + unwind->startInfo(); + + Label functionBegin = build.setLabel(); + unwind->startFunction(); // Prologue (some of these registers don't have to be saved, but we want to have a big prologue) build.push(r10); @@ -365,8 +525,6 @@ TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64") build.lea(rbp, addr[rsp + stackSize]); unwind->setupFrameReg(rbp, stackSize); - unwind->finish(); - size_t prologueSize = build.setLabel().location; // Body @@ -387,8 +545,12 @@ TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64") build.pop(r10); build.ret(); + unwind->finishFunction(build.getLabelOffset(functionBegin), ~0u); + build.finalize(); + unwind->finishInfo(); + size_t blockSize = 4096; // Force allocate to create a new block each time size_t maxTotalSize = 1024 * 1024; CodeAllocator allocator(blockSize, maxTotalSize); diff --git a/tests/Conformance.test.cpp b/tests/Conformance.test.cpp index 0a9d1f77..ee7066b2 100644 --- a/tests/Conformance.test.cpp +++ b/tests/Conformance.test.cpp @@ -285,8 +285,16 @@ TEST_CASE("Tables") lua_pushcfunction( L, [](lua_State* L) { - unsigned v = luaL_checkunsigned(L, 1); - lua_pushlightuserdata(L, reinterpret_cast(uintptr_t(v))); + if (lua_type(L, 1) == LUA_TNUMBER) + { + unsigned v = luaL_checkunsigned(L, 1); + lua_pushlightuserdata(L, reinterpret_cast(uintptr_t(v))); + } + else + { + const void* p = lua_topointer(L, 1); + lua_pushlightuserdata(L, const_cast(p)); + } return 1; }, "makelud"); @@ -402,21 +410,24 @@ TEST_CASE("PCall") { ScopedFastFlag sff("LuauBetterOOMHandling", true); - runConformance("pcall.lua", [](lua_State* L) { - lua_pushcfunction(L, cxxthrow, "cxxthrow"); - lua_setglobal(L, "cxxthrow"); + runConformance( + "pcall.lua", + [](lua_State* L) { + lua_pushcfunction(L, cxxthrow, "cxxthrow"); + lua_setglobal(L, "cxxthrow"); - lua_pushcfunction( - L, - [](lua_State* L) -> int { - lua_State* co = lua_tothread(L, 1); - lua_xmove(L, co, 1); - lua_resumeerror(co, L); - return 0; - }, - "resumeerror"); - lua_setglobal(L, "resumeerror"); - }, nullptr, lua_newstate(limitedRealloc, nullptr)); + lua_pushcfunction( + L, + [](lua_State* L) -> int { + lua_State* co = lua_tothread(L, 1); + lua_xmove(L, co, 1); + lua_resumeerror(co, L); + return 0; + }, + "resumeerror"); + lua_setglobal(L, "resumeerror"); + }, + nullptr, lua_newstate(limitedRealloc, nullptr)); } TEST_CASE("Pack") diff --git a/tests/Fixture.cpp b/tests/Fixture.cpp index aebf177c..aba2891e 100644 --- a/tests/Fixture.cpp +++ b/tests/Fixture.cpp @@ -21,6 +21,7 @@ static const char* mainModuleName = "MainModule"; LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution); +LUAU_FASTFLAG(LuauOnDemandTypecheckers); extern std::optional randomSeed; // tests/main.cpp @@ -180,9 +181,16 @@ AstStatBlock* Fixture::parse(const std::string& source, const ParseOptions& pars Luau::lint(sourceModule->root, *sourceModule->names, frontend.globals.globalScope, module.get(), sourceModule->hotcomments, {}); } + else if (!FFlag::LuauOnDemandTypecheckers) + { + ModulePtr module = frontend.typeChecker_DEPRECATED.check(*sourceModule, sourceModule->mode.value_or(Luau::Mode::Nonstrict)); + + Luau::lint(sourceModule->root, *sourceModule->names, frontend.globals.globalScope, module.get(), sourceModule->hotcomments, {}); + } else { - ModulePtr module = frontend.typeChecker.check(*sourceModule, sourceModule->mode.value_or(Luau::Mode::Nonstrict)); + TypeChecker typeChecker(frontend.globals.globalScope, &moduleResolver, builtinTypes, &frontend.iceHandler); + ModulePtr module = typeChecker.check(*sourceModule, sourceModule->mode.value_or(Luau::Mode::Nonstrict), std::nullopt); Luau::lint(sourceModule->root, *sourceModule->names, frontend.globals.globalScope, module.get(), sourceModule->hotcomments, {}); } diff --git a/tests/Module.test.cpp b/tests/Module.test.cpp index 7e61235a..3c613a1f 100644 --- a/tests/Module.test.cpp +++ b/tests/Module.test.cpp @@ -3,6 +3,7 @@ #include "Luau/Module.h" #include "Luau/Scope.h" #include "Luau/RecursionCounter.h" +#include "Luau/Parser.h" #include "Fixture.h" @@ -42,6 +43,38 @@ TEST_CASE_FIXTURE(Fixture, "is_within_comment") CHECK(!isWithinComment(*sm, Position{7, 11})); } +TEST_CASE_FIXTURE(Fixture, "is_within_comment_parse_result") +{ + std::string src = R"( + --!strict + local foo = {} + function foo:bar() end + + --[[ + foo: + ]] foo:bar() + + --[[]]--[[]] -- Two distinct comments that have zero characters of space between them. + )"; + + Luau::Allocator alloc; + Luau::AstNameTable names{alloc}; + Luau::ParseOptions parseOptions; + parseOptions.captureComments = true; + Luau::ParseResult parseResult = Luau::Parser::parse(src.data(), src.size(), names, alloc, parseOptions); + + CHECK_EQ(5, parseResult.commentLocations.size()); + + CHECK(isWithinComment(parseResult, Position{1, 15})); + CHECK(isWithinComment(parseResult, Position{6, 16})); + CHECK(isWithinComment(parseResult, Position{9, 13})); + CHECK(isWithinComment(parseResult, Position{9, 14})); + + CHECK(!isWithinComment(parseResult, Position{2, 15})); + CHECK(!isWithinComment(parseResult, Position{7, 10})); + CHECK(!isWithinComment(parseResult, Position{7, 11})); +} + TEST_CASE_FIXTURE(Fixture, "dont_clone_persistent_primitive") { TypeArena dest; @@ -319,6 +352,10 @@ TEST_CASE_FIXTURE(Fixture, "clone_recursion_limit") TEST_CASE_FIXTURE(Fixture, "any_persistance_does_not_leak") { + ScopedFastFlag flags[] = { + {"LuauOccursIsntAlwaysFailure", true}, + }; + fileResolver.source["Module/A"] = R"( export type A = B type B = A @@ -332,7 +369,7 @@ type B = A auto mod = frontend.moduleResolver.getModule("Module/A"); auto it = mod->exportedTypeBindings.find("A"); REQUIRE(it != mod->exportedTypeBindings.end()); - CHECK(toString(it->second.type) == "any"); + CHECK(toString(it->second.type) == "*error-type*"); } TEST_CASE_FIXTURE(BuiltinsFixture, "do_not_clone_reexports") diff --git a/tests/StringUtils.test.cpp b/tests/StringUtils.test.cpp index afef3b06..786f965e 100644 --- a/tests/StringUtils.test.cpp +++ b/tests/StringUtils.test.cpp @@ -106,4 +106,22 @@ TEST_CASE("AreWeUsingDistanceWithAdjacentTranspositionsAndNotOptimalStringAlignm CHECK_EQ(distance, 2); } +TEST_CASE("EditDistanceSupportsUnicode") +{ + // ASCII character + CHECK_EQ(Luau::editDistance("A block", "X block"), 1); + + // UTF-8 2 byte character + CHECK_EQ(Luau::editDistance("A block", "À block"), 2); + + // UTF-8 3 byte character + CHECK_EQ(Luau::editDistance("A block", "⪻ block"), 3); + + // UTF-8 4 byte character + CHECK_EQ(Luau::editDistance("A block", "𒋄 block"), 4); + + // UTF-8 extreme characters + CHECK_EQ(Luau::editDistance("A block", "R̴̨̢̟̚ŏ̶̳̳͚́ͅb̶̡̻̞̐̿ͅl̸̼͝ợ̷̜͓̒̏͜͝ẍ̴̝̦̟̰́̒́̌ block"), 85); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.annotations.test.cpp b/tests/TypeInfer.annotations.test.cpp index 2c87cb41..3de52999 100644 --- a/tests/TypeInfer.annotations.test.cpp +++ b/tests/TypeInfer.annotations.test.cpp @@ -435,6 +435,10 @@ TEST_CASE_FIXTURE(Fixture, "typeof_expr") TEST_CASE_FIXTURE(Fixture, "corecursive_types_error_on_tight_loop") { + ScopedFastFlag flags[] = { + {"LuauOccursIsntAlwaysFailure", true}, + }; + CheckResult result = check(R"( type A = B type B = A @@ -443,10 +447,10 @@ TEST_CASE_FIXTURE(Fixture, "corecursive_types_error_on_tight_loop") local bb:B )"); - TypeId fType = requireType("aa"); - const AnyType* ftv = get(follow(fType)); - REQUIRE(ftv != nullptr); - REQUIRE(!result.errors.empty()); + LUAU_REQUIRE_ERROR_COUNT(1, result); + + OccursCheckFailed* ocf = get(result.errors[0]); + REQUIRE(ocf); } TEST_CASE_FIXTURE(Fixture, "type_alias_always_resolve_to_a_real_type") @@ -762,6 +766,7 @@ TEST_CASE_FIXTURE(Fixture, "occurs_check_on_cyclic_union_type") { CheckResult result = check(R"( type T = T | T + local x : T )"); LUAU_REQUIRE_ERROR_COUNT(1, result); diff --git a/tests/TypeInfer.functions.test.cpp b/tests/TypeInfer.functions.test.cpp index f1d42c6a..942ce191 100644 --- a/tests/TypeInfer.functions.test.cpp +++ b/tests/TypeInfer.functions.test.cpp @@ -1281,6 +1281,39 @@ f(function(x) return x * 2 end) LUAU_REQUIRE_NO_ERRORS(result); } +TEST_CASE_FIXTURE(Fixture, "variadic_any_is_compatible_with_a_generic_TypePack") +{ + ScopedFastFlag sff[] = { + {"LuauVariadicAnyCanBeGeneric", true} + }; + + CheckResult result = check(R"( + --!strict + local function f(...) return ... end + local g = function(...) return f(...) end + )"); + + LUAU_REQUIRE_NO_ERRORS(result); +} + +// https://github.com/Roblox/luau/issues/767 +TEST_CASE_FIXTURE(BuiltinsFixture, "variadic_any_is_compatible_with_a_generic_TypePack_2") +{ + ScopedFastFlag sff{"LuauVariadicAnyCanBeGeneric", true}; + + CheckResult result = check(R"( + local function somethingThatsAny(...: any) + print(...) + end + + local function x(...: T...) + somethingThatsAny(...) -- Failed to unify variadic type packs + end + )"); + + LUAU_REQUIRE_NO_ERRORS(result); +} + TEST_CASE_FIXTURE(Fixture, "infer_anonymous_function_arguments_outside_call") { CheckResult result = check(R"( diff --git a/tests/TypeInfer.operators.test.cpp b/tests/TypeInfer.operators.test.cpp index 174bc310..d224195c 100644 --- a/tests/TypeInfer.operators.test.cpp +++ b/tests/TypeInfer.operators.test.cpp @@ -53,10 +53,6 @@ TEST_CASE_FIXTURE(Fixture, "or_joins_types_with_no_superfluous_union") TEST_CASE_FIXTURE(Fixture, "and_does_not_always_add_boolean") { - ScopedFastFlag sff[]{ - {"LuauTryhardAnd", true}, - }; - CheckResult result = check(R"( local s = "a" and 10 local x:boolean|number = s @@ -737,6 +733,8 @@ TEST_CASE_FIXTURE(Fixture, "error_on_invalid_operand_types_to_relational_operato TEST_CASE_FIXTURE(Fixture, "cli_38355_recursive_union") { + ScopedFastFlag sff{"LuauOccursIsntAlwaysFailure", true}; + CheckResult result = check(R"( --!strict local _ @@ -744,7 +742,7 @@ TEST_CASE_FIXTURE(Fixture, "cli_38355_recursive_union") )"); LUAU_REQUIRE_ERROR_COUNT(1, result); - CHECK_EQ("Type contains a self-recursive construct that cannot be resolved", toString(result.errors[0])); + CHECK_EQ("Unknown type used in + operation; consider adding a type annotation to '_'", toString(result.errors[0])); } TEST_CASE_FIXTURE(BuiltinsFixture, "UnknownGlobalCompoundAssign") @@ -1048,10 +1046,6 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "mm_comparisons_must_return_a_boolean") TEST_CASE_FIXTURE(BuiltinsFixture, "reworked_and") { - ScopedFastFlag sff[]{ - {"LuauTryhardAnd", true}, - }; - CheckResult result = check(R"( local a: number? = 5 local b: boolean = (a or 1) > 10 @@ -1077,10 +1071,6 @@ local w = c and 1 TEST_CASE_FIXTURE(BuiltinsFixture, "reworked_or") { - ScopedFastFlag sff[]{ - {"LuauTryhardAnd", true}, - }; - CheckResult result = check(R"( local a: number | false = 5 local b: number? = 6 @@ -1115,11 +1105,6 @@ local f1 = f or 'f' TEST_CASE_FIXTURE(BuiltinsFixture, "reducing_and") { - ScopedFastFlag sff[]{ - {"LuauTryhardAnd", true}, - {"LuauReducingAndOr", true}, - }; - CheckResult result = check(R"( type Foo = { name: string?, flag: boolean? } local arr: {Foo} = {} @@ -1137,4 +1122,61 @@ end LUAU_REQUIRE_NO_ERRORS(result); } +TEST_CASE_FIXTURE(BuiltinsFixture, "luau_polyfill_is_array_simplified") +{ + CheckResult result = check(R"( + --!strict + return function(value: any) : boolean + if typeof(value) ~= "number" then + return false + end + if value % 1 ~= 0 or value < 1 then + return false + end + return true + end + )"); + + LUAU_REQUIRE_NO_ERRORS(result); +} + +TEST_CASE_FIXTURE(BuiltinsFixture, "luau_polyfill_is_array") +{ + CheckResult result = check(R"( +--!strict +return function(value: any): boolean + if typeof(value) ~= "table" then + return false + end + if next(value) == nil then + -- an empty table is an empty array + return true + end + + local length = #value + + if length == 0 then + return false + end + + local count = 0 + local sum = 0 + for key in pairs(value) do + if typeof(key) ~= "number" then + return false + end + if key % 1 ~= 0 or key < 1 then + return false + end + count += 1 + sum += key + end + + return sum == (count * (count + 1) / 2) +end + )"); + + LUAU_REQUIRE_NO_ERRORS(result); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.provisional.test.cpp b/tests/TypeInfer.provisional.test.cpp index 87419deb..e074bc87 100644 --- a/tests/TypeInfer.provisional.test.cpp +++ b/tests/TypeInfer.provisional.test.cpp @@ -320,23 +320,6 @@ TEST_CASE_FIXTURE(Fixture, "weird_fail_to_unify_type_pack") LUAU_REQUIRE_ERRORS(result); // Should not have any errors. } -TEST_CASE_FIXTURE(Fixture, "weird_fail_to_unify_variadic_pack") -{ - ScopedFastFlag sff[] = { - // I'm not sure why this is broken without DCR, but it seems to be fixed - // when DCR is enabled. - {"DebugLuauDeferredConstraintResolution", false}, - }; - - CheckResult result = check(R"( - --!strict - local function f(...) return ... end - local g = function(...) return f(...) end - )"); - - LUAU_REQUIRE_ERRORS(result); // Should not have any errors. -} - // Belongs in TypeInfer.builtins.test.cpp. TEST_CASE_FIXTURE(BuiltinsFixture, "pcall_returns_at_least_two_value_but_function_returns_nothing") { @@ -819,4 +802,23 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "table_insert_with_a_singleton_argument") } } +// We really should be warning on this. We have no guarantee that T has any properties. +TEST_CASE_FIXTURE(Fixture, "lookup_prop_of_intersection_containing_unions_of_tables_that_have_the_prop") +{ + CheckResult result = check(R"( + local function mergeOptions(options: T & ({variable: string} | {variable: number})) + return options.variable + end + )"); + + LUAU_REQUIRE_NO_ERRORS(result); + + // LUAU_REQUIRE_ERROR_COUNT(1, result); + + // const UnknownProperty* unknownProp = get(result.errors[0]); + // REQUIRE(unknownProp); + + // CHECK("variable" == unknownProp->key); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.test.cpp b/tests/TypeInfer.test.cpp index 3088235a..f540be07 100644 --- a/tests/TypeInfer.test.cpp +++ b/tests/TypeInfer.test.cpp @@ -1195,6 +1195,21 @@ local b = typeof(foo) ~= 'nil' CHECK(toString(result.errors[1]) == "Unknown global 'foo'"); } +TEST_CASE_FIXTURE(Fixture, "occurs_isnt_always_failure") +{ + ScopedFastFlag sff{"LuauOccursIsntAlwaysFailure", true}; + + CheckResult result = check(R"( +function f(x, c) -- x : X + local y = if c then x else nil -- y : X? + local z = if c then x else nil -- z : X? + y = z +end + )"); + + LUAU_REQUIRE_NO_ERRORS(result); +} + TEST_CASE_FIXTURE(Fixture, "dcr_delays_expansion_of_function_containing_blocked_parameter_type") { ScopedFastFlag sff[] = { diff --git a/tests/TypeInfer.unionTypes.test.cpp b/tests/TypeInfer.unionTypes.test.cpp index 19a19e45..19b22148 100644 --- a/tests/TypeInfer.unionTypes.test.cpp +++ b/tests/TypeInfer.unionTypes.test.cpp @@ -776,4 +776,20 @@ TEST_CASE_FIXTURE(Fixture, "generic_function_with_optional_arg") LUAU_REQUIRE_NO_ERRORS(result); } +TEST_CASE_FIXTURE(Fixture, "lookup_prop_of_intersection_containing_unions") +{ + CheckResult result = check(R"( + local function mergeOptions(options: T & ({} | {})) + return options.variables + end + )"); + + LUAU_REQUIRE_ERROR_COUNT(1, result); + + const UnknownProperty* unknownProp = get(result.errors[0]); + REQUIRE(unknownProp); + + CHECK("variables" == unknownProp->key); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.unknownnever.test.cpp b/tests/TypeInfer.unknownnever.test.cpp index 410fd52d..8558670c 100644 --- a/tests/TypeInfer.unknownnever.test.cpp +++ b/tests/TypeInfer.unknownnever.test.cpp @@ -301,11 +301,6 @@ TEST_CASE_FIXTURE(Fixture, "length_of_never") TEST_CASE_FIXTURE(Fixture, "dont_unify_operands_if_one_of_the_operand_is_never_in_any_ordering_operators") { - ScopedFastFlag sff[]{ - {"LuauTryhardAnd", true}, - {"LuauReducingAndOr", true}, - }; - CheckResult result = check(R"( local function ord(x: nil, y) return x ~= nil and x > y diff --git a/tests/TypeVar.test.cpp b/tests/TypeVar.test.cpp index 3f0becc5..dbf58cc8 100644 --- a/tests/TypeVar.test.cpp +++ b/tests/TypeVar.test.cpp @@ -273,12 +273,14 @@ TEST_CASE_FIXTURE(Fixture, "substitution_skip_failure") TypeId root = &ttvTweenResult; - frontend.typeChecker.currentModule = std::make_shared(); - frontend.typeChecker.currentModule->scopes.emplace_back(Location{}, std::make_shared(builtinTypes->anyTypePack)); + ModulePtr currentModule = std::make_shared(); + Anyification anyification(¤tModule->internalTypes, frontend.globals.globalScope, builtinTypes, &frontend.iceHandler, builtinTypes->anyType, + builtinTypes->anyTypePack); + std::optional any = anyification.substitute(root); - TypeId result = frontend.typeChecker.anyify(frontend.globals.globalScope, root, Location{}); - - CHECK_EQ("{| f: t1 |} where t1 = () -> {| f: () -> {| f: ({| f: t1 |}) -> (), signal: {| f: (any) -> () |} |} |}", toString(result)); + REQUIRE(!anyification.normalizationTooComplex); + REQUIRE(any.has_value()); + CHECK_EQ("{| f: t1 |} where t1 = () -> {| f: () -> {| f: ({| f: t1 |}) -> (), signal: {| f: (any) -> () |} |} |}", toString(*any)); } TEST_CASE("tagging_tables") diff --git a/tests/conformance/math.lua b/tests/conformance/math.lua index ea3b5c87..47342730 100644 --- a/tests/conformance/math.lua +++ b/tests/conformance/math.lua @@ -347,5 +347,15 @@ assert(select('#', math.ceil(1.6)) == 1) assert(select('#', math.sqrt(9)) == 1) assert(select('#', math.deg(9)) == 1) assert(select('#', math.rad(9)) == 1) +assert(select('#', math.sin(1.5)) == 1) +assert(select('#', math.atan2(1.5, 0.5)) == 1) +assert(select('#', math.modf(1.5)) == 2) +assert(select('#', math.frexp(1.5)) == 2) + +-- test that fastcalls that return variadic results return them correctly in variadic position +assert(select(1, math.modf(1.5)) == 1) +assert(select(2, math.modf(1.5)) == 0.5) +assert(select(1, math.frexp(1.5)) == 0.75) +assert(select(2, math.frexp(1.5)) == 1) return('OK') diff --git a/tests/conformance/tables.lua b/tests/conformance/tables.lua index 596eed3d..03b46396 100644 --- a/tests/conformance/tables.lua +++ b/tests/conformance/tables.lua @@ -715,4 +715,11 @@ do end end +-- check that fast path for table lookup can't be tricked into assuming a light user data with string pointer is a string +assert((function () + local t = {} + t[makelud("hi")] = "no" + return t.hi +end)() == nil) + return"OK" diff --git a/tools/lvmexecute_split.py b/tools/lvmexecute_split.py index 16de45dc..6e64bcd0 100644 --- a/tools/lvmexecute_split.py +++ b/tools/lvmexecute_split.py @@ -34,7 +34,7 @@ source = """// This file is part of the Luau programming language and is license function = "" signature = "" -includeInsts = ["LOP_NEWCLOSURE", "LOP_NAMECALL", "LOP_FORGPREP", "LOP_GETVARARGS", "LOP_DUPCLOSURE", "LOP_PREPVARARGS", "LOP_BREAK", "LOP_GETGLOBAL", "LOP_SETGLOBAL", "LOP_GETTABLEKS", "LOP_SETTABLEKS"] +includeInsts = ["LOP_NEWCLOSURE", "LOP_NAMECALL", "LOP_FORGPREP", "LOP_GETVARARGS", "LOP_DUPCLOSURE", "LOP_PREPVARARGS", "LOP_BREAK", "LOP_GETGLOBAL", "LOP_SETGLOBAL", "LOP_GETTABLEKS", "LOP_SETTABLEKS", "LOP_SETLIST"] state = 0