luau/tests/conformance/vector.lua
Arseny Kapoulkine 9aa82c6fb9
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194)
When the input is a constant, we use a fairly inefficient sequence of
fmov+fcvt+dup or, when the double isn't encodable in fmov,
adr+ldr+fcvt+dup.

Instead, we can use the same lowering as X64 when the input is a
constant, and load the vector from memory. However, if the constant is
encodable via fmov, we can use a vector fmov instead (which is just one
instruction and doesn't need constant space).

Fortunately the bit encoding of fmov for 32-bit floating point numbers
matches that of 64-bit: the decoding algorithm is a little different
because it expands into a larger exponent, but the values are
compatible, so if a double can be encoded into a scalar fmov with a
given abcdefgh pattern, the same pattern should encode the same float;
due to the very limited number of mantissa and exponent bits, all values
that are encodable are also exact in both 32-bit and 64-bit floats.

This strategy is ~same as what gcc uses. For complex vectors, we
previously used 4 instructions and 8 bytes of constant storage, and now
we use 2 instructions and 16 bytes of constant storage, so the memory
footprint is the same; for simple vectors we just need 1 instruction (4
bytes).

clang lowers vector constants a little differently, opting to synthesize
a 64-bit integer using 4 instructions (mov/movk) and then move it to the
vector register - this requires 5 instructions and 20 bytes, vs ours/gcc
2 instructions and 8+16=24 bytes. I tried a simpler version of this that
would be more compact - synthesize a 32-bit integer constant with
mov+movk, and move it to vector register via dup.4s - but this was a
little slower on M2, so for now we prefer the slightly larger version as
it's not a regression vs current implementation.

On the vector approximation benchmark we get:

- Before this PR (flag=false): ~7.85 ns/op
- After this PR (flag=true): ~7.74 ns/op
- After this PR, with 0.125 instead of 0.123 in the benchmark code (to
use fmov): ~7.52 ns/op
- Not part of this PR, but the mov/dup strategy described above: ~8.00
ns/op
2024-03-13 12:56:11 -07:00

139 lines
4.5 KiB
Lua

-- This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
print('testing vectors')
-- detect vector size
local vector_size = if pcall(function() return vector(0, 0, 0).w end) then 4 else 3
-- equality
assert(vector(1, 2, 3) == vector(1, 2, 3))
assert(vector(0, 1, 2) == vector(-0, 1, 2))
assert(vector(1, 2, 3) ~= vector(1, 2, 4))
-- rawequal
assert(rawequal(vector(1, 2, 3), vector(1, 2, 3)))
assert(rawequal(vector(0, 1, 2), vector(-0, 1, 2)))
assert(not rawequal(vector(1, 2, 3), vector(1, 2, 4)))
-- type & tostring
assert(type(vector(1, 2, 3)) == "vector")
if vector_size == 4 then
assert(tostring(vector(1, 2, 3, 4)) == "1, 2, 3, 4")
assert(tostring(vector(-1, 2, 0.5, 0)) == "-1, 2, 0.5, 0")
else
assert(tostring(vector(1, 2, 3)) == "1, 2, 3")
assert(tostring(vector(-1, 2, 0.5)) == "-1, 2, 0.5")
end
local t = {}
-- basic table access
t[vector(1, 2, 3)] = 42
assert(t[vector(1, 2, 3)] == 42)
assert(t[vector(1, 2, 4)] == nil)
-- negative zero should hash the same as zero
assert(t[vector(0, 0, 0)] == nil)
t[vector(0, 0, 0)] = "hello"
assert(t[vector(0, 0, 0)] == "hello")
assert(t[vector(0, -0, 0)] == "hello")
-- test arithmetic instructions
assert(vector(1, 2, 4) + vector(8, 16, 24) == vector(9, 18, 28));
assert(vector(1, 2, 4) - vector(8, 16, 24) == vector(-7, -14, -20));
local val = 1/'8'
assert(vector(1, 2, 4) * vector(8, 16, 24) == vector(8, 32, 96));
assert(vector(1, 2, 4) * 8 == vector(8, 16, 32));
assert(vector(1, 2, 4) * (1 / val) == vector(8, 16, 32));
assert(8 * vector(8, 16, 24) == vector(64, 128, 192));
assert(vector(1, 2, 4) * '8' == vector(8, 16, 32));
assert('8' * vector(8, 16, 24) == vector(64, 128, 192));
assert(vector(1, 2, 4) * -0.125 == vector(-0.125, -0.25, -0.5))
assert(-0.125 * vector(1, 2, 4) == vector(-0.125, -0.25, -0.5))
assert(vector(1, 2, 4) * 100 == vector(100, 200, 400))
assert(100 * vector(1, 2, 4) == vector(100, 200, 400))
if vector_size == 4 then
assert(vector(1, 2, 4, 8) / vector(8, 16, 24, 32) == vector(1/8, 2/16, 4/24, 8/32));
assert(8 / vector(8, 16, 24, 32) == vector(1, 1/2, 1/3, 1/4));
assert('8' / vector(8, 16, 24, 32) == vector(1, 1/2, 1/3, 1/4));
else
assert(vector(1, 2, 4) / vector(8, 16, 24, 1) == vector(1/8, 2/16, 4/24));
assert(8 / vector(8, 16, 24) == vector(1, 1/2, 1/3));
assert('8' / vector(8, 16, 24) == vector(1, 1/2, 1/3));
end
assert(vector(1, 2, 4) / 8 == vector(1/8, 1/4, 1/2));
assert(vector(1, 2, 4) / (1 / val) == vector(1/8, 2/8, 4/8));
assert(vector(1, 2, 4) / '8' == vector(1/8, 1/4, 1/2));
assert(-vector(1, 2, 4) == vector(-1, -2, -4));
-- test floor division
assert(vector(1, 3, 5) // 2 == vector(0, 1, 2))
assert(vector(1, 3, 5) // val == vector(8, 24, 40))
if vector_size == 4 then
assert(10 // vector(1, 2, 3, 4) == vector(10, 5, 3, 2))
assert(vector(10, 9, 8, 7) // vector(1, 2, 3, 4) == vector(10, 4, 2, 1))
else
assert(10 // vector(1, 2, 3) == vector(10, 5, 3))
assert(vector(10, 9, 8) // vector(1, 2, 3) == vector(10, 4, 2))
end
-- test NaN comparison
local nanv = vector(0/0, 0/0, 0/0)
assert(nanv ~= nanv);
-- __index
assert(vector(1, 2, 2).Magnitude == 3)
assert(vector(0, 0, 0)['Dot'](vector(1, 2, 4), vector(5, 6, 7)) == 45)
-- __namecall
assert(vector(1, 2, 4):Dot(vector(5, 6, 7)) == 45)
-- can't use vector with NaN components as table key
assert(pcall(function() local t = {} t[vector(0/0, 2, 3)] = 1 end) == false)
assert(pcall(function() local t = {} t[vector(1, 0/0, 3)] = 1 end) == false)
assert(pcall(function() local t = {} t[vector(1, 2, 0/0)] = 1 end) == false)
assert(pcall(function() local t = {} rawset(t, vector(0/0, 2, 3), 1) end) == false)
-- make sure we cover both builtin and C impl
assert(vector(1, 2, 4) == vector("1", "2", "4"))
-- validate component access (both cases)
assert(vector(1, 2, 3).x == 1)
assert(vector(1, 2, 3).X == 1)
assert(vector(1, 2, 3).y == 2)
assert(vector(1, 2, 3).Y == 2)
assert(vector(1, 2, 3).z == 3)
assert(vector(1, 2, 3).Z == 3)
-- additional checks for 4-component vectors
if vector_size == 4 then
assert(vector(1, 2, 3, 4).w == 4)
assert(vector(1, 2, 3, 4).W == 4)
end
-- negative zero should hash the same as zero
-- note: our earlier test only really checks the low hash bit, so in absence of perfect avalanche it's insufficient
do
local larget = {}
for i = 1, 2^14 do
larget[vector(0, 0, i)] = true
end
larget[vector(0, 0, 0)] = 42
assert(larget[vector(0, 0, 0)] == 42)
assert(larget[vector(0, 0, -0)] == 42)
assert(larget[vector(0, -0, 0)] == 42)
assert(larget[vector(-0, 0, 0)] == 42)
end
return 'OK'