mirror of
https://github.com/luau-lang/luau.git
synced 2024-11-15 22:35:43 +08:00
80928acb92
Instead of patching the tag component with TVECTOR in every instruction that produces a vector value, we now use a separate IR instruction to do this. This reduces implementation redundancy, but more importantly allows for a class of optimizations: - NUM_TO_VECTOR previously patched the component unconditionally but the result was used only in MUL/DIV_VEC instructions that ignore it anyway; we can now remove this. - ADD_VEC et al can now forward the source of TAG_VECTOR instruction of either input; this shortens the latency chain and in the future could allow us to generate optimal vector instruction sequence once the temporary stores are marked as dead. - In the future on X64, ADD_VEC et al will be able to analyze the input instruction and remove tag masking conditionally. This is not part of this PR as it requires a decision around expected FP environment and/or the necessity of the existing masking to begin with. I've also renamed NUM_TO_VECTOR to NUM_TO_VEC so that "VEC" always refers to "3 float values" and for consistency with ADD/etc. Note: ADD_VEC input forwarding is currently performed unconditionally; it may or may not increase the spills that can't be reloaded from the stack. On A64 this makes the Taylor series computation a tiny bit faster (11.3ns => 11.0ns) as it removes the redundant ins instructions along the NUM_TO_VEC path. Curiously, the optimization of forwarding TAG_VECTOR input to arithmetic instructions actually has a small penalty as without it this PR runs at 10.9 ns. I don't know if this is a property of the benchmark though, as I just noticed that in this benchmark type inference actually fails to infer parts of the computation as a vector op. If desired I will happily omit this part of the change and we can explore that separately.
295 lines
7.8 KiB
C++
295 lines
7.8 KiB
C++
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
#pragma once
|
|
|
|
#include "Luau/Bytecode.h"
|
|
#include "Luau/Common.h"
|
|
#include "Luau/IrData.h"
|
|
|
|
namespace Luau
|
|
{
|
|
namespace CodeGen
|
|
{
|
|
|
|
struct IrBuilder;
|
|
|
|
inline bool isJumpD(LuauOpcode op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case LOP_JUMP:
|
|
case LOP_JUMPIF:
|
|
case LOP_JUMPIFNOT:
|
|
case LOP_JUMPIFEQ:
|
|
case LOP_JUMPIFLE:
|
|
case LOP_JUMPIFLT:
|
|
case LOP_JUMPIFNOTEQ:
|
|
case LOP_JUMPIFNOTLE:
|
|
case LOP_JUMPIFNOTLT:
|
|
case LOP_FORNPREP:
|
|
case LOP_FORNLOOP:
|
|
case LOP_FORGPREP:
|
|
case LOP_FORGLOOP:
|
|
case LOP_FORGPREP_INEXT:
|
|
case LOP_FORGPREP_NEXT:
|
|
case LOP_JUMPBACK:
|
|
case LOP_JUMPXEQKNIL:
|
|
case LOP_JUMPXEQKB:
|
|
case LOP_JUMPXEQKN:
|
|
case LOP_JUMPXEQKS:
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
inline bool isSkipC(LuauOpcode op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case LOP_LOADB:
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
inline bool isFastCall(LuauOpcode op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case LOP_FASTCALL:
|
|
case LOP_FASTCALL1:
|
|
case LOP_FASTCALL2:
|
|
case LOP_FASTCALL2K:
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
inline int getJumpTarget(uint32_t insn, uint32_t pc)
|
|
{
|
|
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(insn));
|
|
|
|
if (isJumpD(op))
|
|
return int(pc + LUAU_INSN_D(insn) + 1);
|
|
else if (isFastCall(op))
|
|
return int(pc + LUAU_INSN_C(insn) + 2);
|
|
else if (isSkipC(op) && LUAU_INSN_C(insn))
|
|
return int(pc + LUAU_INSN_C(insn) + 1);
|
|
else if (op == LOP_JUMPX)
|
|
return int(pc + LUAU_INSN_E(insn) + 1);
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
inline bool isBlockTerminator(IrCmd cmd)
|
|
{
|
|
switch (cmd)
|
|
{
|
|
case IrCmd::JUMP:
|
|
case IrCmd::JUMP_IF_TRUTHY:
|
|
case IrCmd::JUMP_IF_FALSY:
|
|
case IrCmd::JUMP_EQ_TAG:
|
|
case IrCmd::JUMP_CMP_INT:
|
|
case IrCmd::JUMP_EQ_POINTER:
|
|
case IrCmd::JUMP_CMP_NUM:
|
|
case IrCmd::JUMP_FORN_LOOP_COND:
|
|
case IrCmd::JUMP_SLOT_MATCH:
|
|
case IrCmd::RETURN:
|
|
case IrCmd::FORGLOOP:
|
|
case IrCmd::FORGLOOP_FALLBACK:
|
|
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
|
case IrCmd::FALLBACK_FORGPREP:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
inline bool isNonTerminatingJump(IrCmd cmd)
|
|
{
|
|
switch (cmd)
|
|
{
|
|
case IrCmd::TRY_NUM_TO_INDEX:
|
|
case IrCmd::TRY_CALL_FASTGETTM:
|
|
case IrCmd::CHECK_FASTCALL_RES:
|
|
case IrCmd::CHECK_TAG:
|
|
case IrCmd::CHECK_TRUTHY:
|
|
case IrCmd::CHECK_READONLY:
|
|
case IrCmd::CHECK_NO_METATABLE:
|
|
case IrCmd::CHECK_SAFE_ENV:
|
|
case IrCmd::CHECK_ARRAY_SIZE:
|
|
case IrCmd::CHECK_SLOT_MATCH:
|
|
case IrCmd::CHECK_NODE_NO_NEXT:
|
|
case IrCmd::CHECK_NODE_VALUE:
|
|
case IrCmd::CHECK_BUFFER_LEN:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
inline bool hasResult(IrCmd cmd)
|
|
{
|
|
switch (cmd)
|
|
{
|
|
case IrCmd::LOAD_TAG:
|
|
case IrCmd::LOAD_POINTER:
|
|
case IrCmd::LOAD_DOUBLE:
|
|
case IrCmd::LOAD_INT:
|
|
case IrCmd::LOAD_FLOAT:
|
|
case IrCmd::LOAD_TVALUE:
|
|
case IrCmd::LOAD_ENV:
|
|
case IrCmd::GET_ARR_ADDR:
|
|
case IrCmd::GET_SLOT_NODE_ADDR:
|
|
case IrCmd::GET_HASH_NODE_ADDR:
|
|
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
|
|
case IrCmd::ADD_INT:
|
|
case IrCmd::SUB_INT:
|
|
case IrCmd::ADD_NUM:
|
|
case IrCmd::SUB_NUM:
|
|
case IrCmd::MUL_NUM:
|
|
case IrCmd::DIV_NUM:
|
|
case IrCmd::IDIV_NUM:
|
|
case IrCmd::MOD_NUM:
|
|
case IrCmd::MIN_NUM:
|
|
case IrCmd::MAX_NUM:
|
|
case IrCmd::UNM_NUM:
|
|
case IrCmd::FLOOR_NUM:
|
|
case IrCmd::CEIL_NUM:
|
|
case IrCmd::ROUND_NUM:
|
|
case IrCmd::SQRT_NUM:
|
|
case IrCmd::ABS_NUM:
|
|
case IrCmd::ADD_VEC:
|
|
case IrCmd::SUB_VEC:
|
|
case IrCmd::MUL_VEC:
|
|
case IrCmd::DIV_VEC:
|
|
case IrCmd::UNM_VEC:
|
|
case IrCmd::NOT_ANY:
|
|
case IrCmd::CMP_ANY:
|
|
case IrCmd::TABLE_LEN:
|
|
case IrCmd::TABLE_SETNUM:
|
|
case IrCmd::STRING_LEN:
|
|
case IrCmd::NEW_TABLE:
|
|
case IrCmd::DUP_TABLE:
|
|
case IrCmd::TRY_NUM_TO_INDEX:
|
|
case IrCmd::TRY_CALL_FASTGETTM:
|
|
case IrCmd::INT_TO_NUM:
|
|
case IrCmd::UINT_TO_NUM:
|
|
case IrCmd::NUM_TO_INT:
|
|
case IrCmd::NUM_TO_UINT:
|
|
case IrCmd::NUM_TO_VEC:
|
|
case IrCmd::TAG_VECTOR:
|
|
case IrCmd::SUBSTITUTE:
|
|
case IrCmd::INVOKE_FASTCALL:
|
|
case IrCmd::BITAND_UINT:
|
|
case IrCmd::BITXOR_UINT:
|
|
case IrCmd::BITOR_UINT:
|
|
case IrCmd::BITNOT_UINT:
|
|
case IrCmd::BITLSHIFT_UINT:
|
|
case IrCmd::BITRSHIFT_UINT:
|
|
case IrCmd::BITARSHIFT_UINT:
|
|
case IrCmd::BITLROTATE_UINT:
|
|
case IrCmd::BITRROTATE_UINT:
|
|
case IrCmd::BITCOUNTLZ_UINT:
|
|
case IrCmd::BITCOUNTRZ_UINT:
|
|
case IrCmd::INVOKE_LIBM:
|
|
case IrCmd::GET_TYPE:
|
|
case IrCmd::GET_TYPEOF:
|
|
case IrCmd::NEWCLOSURE:
|
|
case IrCmd::FINDUPVAL:
|
|
case IrCmd::BUFFER_READI8:
|
|
case IrCmd::BUFFER_READU8:
|
|
case IrCmd::BUFFER_READI16:
|
|
case IrCmd::BUFFER_READU16:
|
|
case IrCmd::BUFFER_READI32:
|
|
case IrCmd::BUFFER_READF32:
|
|
case IrCmd::BUFFER_READF64:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
inline bool hasSideEffects(IrCmd cmd)
|
|
{
|
|
if (cmd == IrCmd::INVOKE_FASTCALL)
|
|
return true;
|
|
|
|
// Instructions that don't produce a result most likely have other side-effects to make them useful
|
|
// Right now, a full switch would mirror the 'hasResult' function, so we use this simple condition
|
|
return !hasResult(cmd);
|
|
}
|
|
|
|
inline bool isPseudo(IrCmd cmd)
|
|
{
|
|
// Instructions that are used for internal needs and are not a part of final lowering
|
|
return cmd == IrCmd::NOP || cmd == IrCmd::SUBSTITUTE;
|
|
}
|
|
|
|
IrValueKind getCmdValueKind(IrCmd cmd);
|
|
|
|
bool isGCO(uint8_t tag);
|
|
|
|
// Manually add or remove use of an operand
|
|
void addUse(IrFunction& function, IrOp op);
|
|
void removeUse(IrFunction& function, IrOp op);
|
|
|
|
// Remove a single instruction
|
|
void kill(IrFunction& function, IrInst& inst);
|
|
|
|
// Remove a range of instructions
|
|
void kill(IrFunction& function, uint32_t start, uint32_t end);
|
|
|
|
// Remove a block, including all instructions inside
|
|
void kill(IrFunction& function, IrBlock& block);
|
|
|
|
// Replace a single operand and update use counts (can cause chain removal of dead code)
|
|
void replace(IrFunction& function, IrOp& original, IrOp replacement);
|
|
|
|
// Replace a single instruction
|
|
// Target instruction index instead of reference is used to handle introduction of a new block terminator
|
|
void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement);
|
|
|
|
// Replace instruction with a different value (using IrCmd::SUBSTITUTE)
|
|
void substitute(IrFunction& function, IrInst& inst, IrOp replacement);
|
|
|
|
// Replace instruction arguments that point to substitutions with target values
|
|
void applySubstitutions(IrFunction& function, IrOp& op);
|
|
void applySubstitutions(IrFunction& function, IrInst& inst);
|
|
|
|
// Compare numbers using IR condition value
|
|
bool compare(double a, double b, IrCondition cond);
|
|
|
|
// Perform constant folding on instruction at index
|
|
// For most instructions, successful folding results in a IrCmd::SUBSTITUTE
|
|
// But it can also be successful on conditional control-flow, replacing it with an unconditional IrCmd::JUMP
|
|
void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint32_t instIdx);
|
|
|
|
uint32_t getNativeContextOffset(int bfid);
|
|
|
|
// Cleans up blocks that were created with no users
|
|
void killUnusedBlocks(IrFunction& function);
|
|
|
|
// Get blocks in order that tries to maximize fallthrough between them during lowering
|
|
// We want to mostly preserve build order with fallbacks outlined
|
|
// But we also use hints from optimization passes that chain blocks together where there's only one out-in edge between them
|
|
std::vector<uint32_t> getSortedBlockOrder(IrFunction& function);
|
|
|
|
// Returns first non-dead block that comes after block at index 'i' in the sorted blocks array
|
|
// 'dummy' block is returned if the end of array was reached
|
|
IrBlock& getNextBlock(IrFunction& function, const std::vector<uint32_t>& sortedBlocks, IrBlock& dummy, size_t i);
|
|
|
|
} // namespace CodeGen
|
|
} // namespace Luau
|