luau/CodeGen/include/Luau/IrVisitUseDef.h
Arseny Kapoulkine 89b437bb4e
Add SUBRK and DIVRK bytecode instructions to bytecode v5 (#1115)
Right now, we can compile R\*K for all arithmetic instructions, but K\*R
gets compiled into two instructions (LOADN/LOADK + arithmetic opcode).

This is problematic since it leads to reduced performance for some code.
However, we'd like to avoid adding reverse variants of ADDK et al for
all opcodes to avoid the increase in I$ footprint for interpreter.

Looking at the arithmetic instructions, % and // don't have interesting
use cases for K\*V; ^ is sometimes used with constant on the left hand
side but this would need to call pow() by necessity in all cases so it
would be slow regardless of the dispatch overhead. This leaves the four
basic arithmetic operations.

For + and \*, we can implement a compiler-side optimization in the
future that transforms K\*R to R\*K automatically. This could either be
done unconditionally at -O2, or conditionally based on the type of the
value (driven by type annotations / inference) -- this technically
changes behavior in presence of metamethods, although it might be
sensible to just always do this because non-commutative +/* are evil.

However, for - and / it is impossible for the compiler to optimize this
in the future, so we need dedicated opcodes. This only increases the
interpreter size by ~300 bytes (~1.5%) on X64.

This makes spectral-norm and math-partial-sums 6% faster; maybe more
importantly, voxelgen gets 1.5% faster (so this change does have
real-world impact).

To avoid the proliferation of bytecode versions this change piggybacks
on the bytecode version bump that was just made in 604 for vector
constants; we would still be able to enable these independently but
we'll consider v5 complete when both are enabled.

Related: #626

---------

Co-authored-by: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com>
2023-11-28 07:35:01 -08:00

230 lines
7.0 KiB
C++

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include "Luau/IrData.h"
namespace Luau
{
namespace CodeGen
{
template<typename T>
static void visitVmRegDefsUses(T& visitor, IrFunction& function, const IrInst& inst)
{
// For correct analysis, all instruction uses must be handled before handling the definitions
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
visitor.maybeUse(inst.a); // Argument can also be a VmConst
break;
case IrCmd::STORE_TAG:
case IrCmd::STORE_POINTER:
case IrCmd::STORE_DOUBLE:
case IrCmd::STORE_INT:
case IrCmd::STORE_VECTOR:
case IrCmd::STORE_TVALUE:
case IrCmd::STORE_SPLIT_TVALUE:
visitor.maybeDef(inst.a); // Argument can also be a pointer value
break;
case IrCmd::CMP_ANY:
visitor.use(inst.a);
visitor.use(inst.b);
break;
case IrCmd::JUMP_IF_TRUTHY:
case IrCmd::JUMP_IF_FALSY:
visitor.use(inst.a);
break;
// A <- B, C
case IrCmd::DO_ARITH:
visitor.maybeUse(inst.b); // Argument can also be a VmConst
visitor.maybeUse(inst.c); // Argument can also be a VmConst
visitor.def(inst.a);
break;
case IrCmd::GET_TABLE:
visitor.use(inst.b);
visitor.maybeUse(inst.c); // Argument can also be a VmConst
visitor.def(inst.a);
break;
case IrCmd::SET_TABLE:
visitor.use(inst.a);
visitor.use(inst.b);
visitor.maybeUse(inst.c); // Argument can also be a VmConst
break;
// A <- B
case IrCmd::DO_LEN:
visitor.use(inst.b);
visitor.def(inst.a);
break;
case IrCmd::GET_IMPORT:
visitor.def(inst.a);
break;
case IrCmd::CONCAT:
visitor.useRange(vmRegOp(inst.a), function.uintOp(inst.b));
visitor.defRange(vmRegOp(inst.a), function.uintOp(inst.b));
break;
case IrCmd::GET_UPVALUE:
visitor.def(inst.a);
break;
case IrCmd::SET_UPVALUE:
visitor.use(inst.b);
break;
case IrCmd::INTERRUPT:
break;
case IrCmd::BARRIER_OBJ:
case IrCmd::BARRIER_TABLE_FORWARD:
visitor.maybeUse(inst.b);
break;
case IrCmd::CLOSE_UPVALS:
// Closing an upvalue should be counted as a register use (it copies the fresh register value)
// But we lack the required information about the specific set of registers that are affected
// Because we don't plan to optimize captured registers atm, we skip full dataflow analysis for them right now
break;
case IrCmd::CAPTURE:
visitor.maybeUse(inst.a);
if (function.uintOp(inst.b) == 1)
visitor.capture(vmRegOp(inst.a));
break;
case IrCmd::SETLIST:
visitor.use(inst.b);
visitor.useRange(vmRegOp(inst.c), function.intOp(inst.d));
break;
case IrCmd::CALL:
visitor.use(inst.a);
visitor.useRange(vmRegOp(inst.a) + 1, function.intOp(inst.b));
visitor.defRange(vmRegOp(inst.a), function.intOp(inst.c));
break;
case IrCmd::RETURN:
visitor.useRange(vmRegOp(inst.a), function.intOp(inst.b));
break;
// TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
case IrCmd::FASTCALL:
case IrCmd::INVOKE_FASTCALL:
if (int count = function.intOp(inst.e); count != -1)
{
if (count >= 3)
{
LUAU_ASSERT(inst.d.kind == IrOpKind::VmReg && vmRegOp(inst.d) == vmRegOp(inst.c) + 1);
visitor.useRange(vmRegOp(inst.c), count);
}
else
{
if (count >= 1)
visitor.use(inst.c);
if (count >= 2)
visitor.maybeUse(inst.d); // Argument can also be a VmConst
}
}
else
{
visitor.useVarargs(vmRegOp(inst.c));
}
// Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
if (int count = function.intOp(inst.f); count != -1)
visitor.defRange(vmRegOp(inst.b), count);
break;
case IrCmd::FORGLOOP:
// First register is not used by instruction, we check that it's still 'nil' with CHECK_TAG
visitor.use(inst.a, 1);
visitor.use(inst.a, 2);
visitor.def(inst.a, 2);
visitor.defRange(vmRegOp(inst.a) + 3, function.intOp(inst.b));
break;
case IrCmd::FORGLOOP_FALLBACK:
visitor.useRange(vmRegOp(inst.a), 3);
visitor.def(inst.a, 2);
visitor.defRange(vmRegOp(inst.a) + 3, uint8_t(function.intOp(inst.b))); // ignore most significant bit
break;
case IrCmd::FORGPREP_XNEXT_FALLBACK:
visitor.use(inst.b);
break;
case IrCmd::FALLBACK_GETGLOBAL:
visitor.def(inst.b);
break;
case IrCmd::FALLBACK_SETGLOBAL:
visitor.use(inst.b);
break;
case IrCmd::FALLBACK_GETTABLEKS:
visitor.use(inst.c);
visitor.def(inst.b);
break;
case IrCmd::FALLBACK_SETTABLEKS:
visitor.use(inst.b);
visitor.use(inst.c);
break;
case IrCmd::FALLBACK_NAMECALL:
visitor.use(inst.c);
visitor.defRange(vmRegOp(inst.b), 2);
break;
case IrCmd::FALLBACK_PREPVARARGS:
// No effect on explicitly referenced registers
break;
case IrCmd::FALLBACK_GETVARARGS:
visitor.defRange(vmRegOp(inst.b), function.intOp(inst.c));
break;
case IrCmd::FALLBACK_DUPCLOSURE:
visitor.def(inst.b);
break;
case IrCmd::FALLBACK_FORGPREP:
visitor.use(inst.b);
visitor.defRange(vmRegOp(inst.b), 3);
break;
case IrCmd::ADJUST_STACK_TO_REG:
visitor.defRange(vmRegOp(inst.a), -1);
break;
case IrCmd::ADJUST_STACK_TO_TOP:
// While this can be considered to be a vararg consumer, it is already handled in fastcall instructions
break;
case IrCmd::GET_TYPEOF:
visitor.use(inst.a);
break;
case IrCmd::FINDUPVAL:
visitor.use(inst.a);
break;
default:
// All instructions which reference registers have to be handled explicitly
LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
break;
}
}
template<typename T>
static void visitVmRegDefsUses(T& visitor, IrFunction& function, const IrBlock& block)
{
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
{
IrInst& inst = function.instructions[instIdx];
visitVmRegDefsUses(visitor, function, inst);
}
}
} // namespace CodeGen
} // namespace Luau