luau/CodeGen/include/Luau/IrVisitUseDef.h

231 lines
7.0 KiB
C
Raw Normal View History

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include "Luau/IrData.h"
namespace Luau
{
namespace CodeGen
{
template<typename T>
static void visitVmRegDefsUses(T& visitor, IrFunction& function, const IrInst& inst)
{
// For correct analysis, all instruction uses must be handled before handling the definitions
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
visitor.maybeUse(inst.a); // Argument can also be a VmConst
break;
case IrCmd::STORE_TAG:
case IrCmd::STORE_EXTRA:
case IrCmd::STORE_POINTER:
case IrCmd::STORE_DOUBLE:
case IrCmd::STORE_INT:
case IrCmd::STORE_VECTOR:
case IrCmd::STORE_TVALUE:
case IrCmd::STORE_SPLIT_TVALUE:
visitor.maybeDef(inst.a); // Argument can also be a pointer value
break;
case IrCmd::CMP_ANY:
visitor.use(inst.a);
visitor.use(inst.b);
break;
case IrCmd::JUMP_IF_TRUTHY:
case IrCmd::JUMP_IF_FALSY:
visitor.use(inst.a);
break;
// A <- B, C
case IrCmd::DO_ARITH:
Add SUBRK and DIVRK bytecode instructions to bytecode v5 (#1115) Right now, we can compile R\*K for all arithmetic instructions, but K\*R gets compiled into two instructions (LOADN/LOADK + arithmetic opcode). This is problematic since it leads to reduced performance for some code. However, we'd like to avoid adding reverse variants of ADDK et al for all opcodes to avoid the increase in I$ footprint for interpreter. Looking at the arithmetic instructions, % and // don't have interesting use cases for K\*V; ^ is sometimes used with constant on the left hand side but this would need to call pow() by necessity in all cases so it would be slow regardless of the dispatch overhead. This leaves the four basic arithmetic operations. For + and \*, we can implement a compiler-side optimization in the future that transforms K\*R to R\*K automatically. This could either be done unconditionally at -O2, or conditionally based on the type of the value (driven by type annotations / inference) -- this technically changes behavior in presence of metamethods, although it might be sensible to just always do this because non-commutative +/* are evil. However, for - and / it is impossible for the compiler to optimize this in the future, so we need dedicated opcodes. This only increases the interpreter size by ~300 bytes (~1.5%) on X64. This makes spectral-norm and math-partial-sums 6% faster; maybe more importantly, voxelgen gets 1.5% faster (so this change does have real-world impact). To avoid the proliferation of bytecode versions this change piggybacks on the bytecode version bump that was just made in 604 for vector constants; we would still be able to enable these independently but we'll consider v5 complete when both are enabled. Related: #626 --------- Co-authored-by: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com>
2023-11-28 23:35:01 +08:00
visitor.maybeUse(inst.b); // Argument can also be a VmConst
visitor.maybeUse(inst.c); // Argument can also be a VmConst
visitor.def(inst.a);
break;
case IrCmd::GET_TABLE:
visitor.use(inst.b);
visitor.maybeUse(inst.c); // Argument can also be a VmConst
visitor.def(inst.a);
break;
case IrCmd::SET_TABLE:
visitor.use(inst.a);
visitor.use(inst.b);
visitor.maybeUse(inst.c); // Argument can also be a VmConst
break;
// A <- B
case IrCmd::DO_LEN:
visitor.use(inst.b);
visitor.def(inst.a);
break;
case IrCmd::GET_IMPORT:
visitor.def(inst.a);
break;
case IrCmd::CONCAT:
visitor.useRange(vmRegOp(inst.a), function.uintOp(inst.b));
visitor.defRange(vmRegOp(inst.a), function.uintOp(inst.b));
break;
case IrCmd::GET_UPVALUE:
visitor.def(inst.a);
break;
case IrCmd::SET_UPVALUE:
visitor.use(inst.b);
break;
case IrCmd::INTERRUPT:
break;
case IrCmd::BARRIER_OBJ:
case IrCmd::BARRIER_TABLE_FORWARD:
visitor.maybeUse(inst.b);
break;
case IrCmd::CLOSE_UPVALS:
// Closing an upvalue should be counted as a register use (it copies the fresh register value)
// But we lack the required information about the specific set of registers that are affected
// Because we don't plan to optimize captured registers atm, we skip full dataflow analysis for them right now
break;
case IrCmd::CAPTURE:
visitor.maybeUse(inst.a);
if (function.uintOp(inst.b) == 1)
visitor.capture(vmRegOp(inst.a));
break;
case IrCmd::SETLIST:
visitor.use(inst.b);
visitor.useRange(vmRegOp(inst.c), function.intOp(inst.d));
break;
case IrCmd::CALL:
visitor.use(inst.a);
visitor.useRange(vmRegOp(inst.a) + 1, function.intOp(inst.b));
visitor.defRange(vmRegOp(inst.a), function.intOp(inst.c));
break;
case IrCmd::RETURN:
visitor.useRange(vmRegOp(inst.a), function.intOp(inst.b));
break;
// TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
case IrCmd::FASTCALL:
case IrCmd::INVOKE_FASTCALL:
if (int count = function.intOp(inst.e); count != -1)
{
if (count >= 3)
{
LUAU_ASSERT(inst.d.kind == IrOpKind::VmReg && vmRegOp(inst.d) == vmRegOp(inst.c) + 1);
visitor.useRange(vmRegOp(inst.c), count);
}
else
{
if (count >= 1)
visitor.use(inst.c);
if (count >= 2)
visitor.maybeUse(inst.d); // Argument can also be a VmConst
}
}
else
{
visitor.useVarargs(vmRegOp(inst.c));
}
// Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
if (int count = function.intOp(inst.f); count != -1)
visitor.defRange(vmRegOp(inst.b), count);
break;
case IrCmd::FORGLOOP:
// First register is not used by instruction, we check that it's still 'nil' with CHECK_TAG
visitor.use(inst.a, 1);
visitor.use(inst.a, 2);
visitor.def(inst.a, 2);
visitor.defRange(vmRegOp(inst.a) + 3, function.intOp(inst.b));
break;
case IrCmd::FORGLOOP_FALLBACK:
visitor.useRange(vmRegOp(inst.a), 3);
visitor.def(inst.a, 2);
visitor.defRange(vmRegOp(inst.a) + 3, uint8_t(function.intOp(inst.b))); // ignore most significant bit
break;
case IrCmd::FORGPREP_XNEXT_FALLBACK:
visitor.use(inst.b);
break;
case IrCmd::FALLBACK_GETGLOBAL:
visitor.def(inst.b);
break;
case IrCmd::FALLBACK_SETGLOBAL:
visitor.use(inst.b);
break;
case IrCmd::FALLBACK_GETTABLEKS:
visitor.use(inst.c);
visitor.def(inst.b);
break;
case IrCmd::FALLBACK_SETTABLEKS:
visitor.use(inst.b);
visitor.use(inst.c);
break;
case IrCmd::FALLBACK_NAMECALL:
visitor.use(inst.c);
visitor.defRange(vmRegOp(inst.b), 2);
break;
case IrCmd::FALLBACK_PREPVARARGS:
// No effect on explicitly referenced registers
break;
case IrCmd::FALLBACK_GETVARARGS:
visitor.defRange(vmRegOp(inst.b), function.intOp(inst.c));
break;
case IrCmd::FALLBACK_DUPCLOSURE:
visitor.def(inst.b);
break;
case IrCmd::FALLBACK_FORGPREP:
visitor.use(inst.b);
visitor.defRange(vmRegOp(inst.b), 3);
break;
case IrCmd::ADJUST_STACK_TO_REG:
visitor.defRange(vmRegOp(inst.a), -1);
break;
case IrCmd::ADJUST_STACK_TO_TOP:
// While this can be considered to be a vararg consumer, it is already handled in fastcall instructions
break;
case IrCmd::GET_TYPEOF:
visitor.use(inst.a);
break;
case IrCmd::FINDUPVAL:
visitor.use(inst.a);
break;
default:
// All instructions which reference registers have to be handled explicitly
LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
break;
}
}
template<typename T>
static void visitVmRegDefsUses(T& visitor, IrFunction& function, const IrBlock& block)
{
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
{
IrInst& inst = function.instructions[instIdx];
visitVmRegDefsUses(visitor, function, inst);
}
}
} // namespace CodeGen
} // namespace Luau