mirror of
https://github.com/luau-lang/luau.git
synced 2024-11-15 14:25:44 +08:00
c5f4d973d7
This change replaces scalar versions of vector opcodes for A64 with actual vector instructions. We take the approach similar to X64: patch last component with zero, perform the math, patch last component with type tag. I'm hoping that in the future the type tag will be placed separately (separate IR opcode?) because right now chains of math operations result in excessive type tag operations. To patch the type tag without always keeping a mask in a register, ins.4s instructions can be used; unfortunately it's only capable of patching a register in-place, so we need an extra register copy in case it's not last-use. Usually it's last-use so the patch is free; probably with IR rework mentioned above all of this can be improved (e.g. load-with-patch will never need to copy). ~It's not 100% clear if we *have* to patch type tag: Apple does preserve denormals but we'd need to benchmark this to see if there's an actual performance impact. But for now we're playing it safe.~ This was tested by running the conformance tests, and new opcode implementations were checked by comparing the result with https://armconverter.com/. Performance testing is complicated by the fact that OSS Luau doesn't support vector constructor out of the box, and other limitations of codegen. I've hacked vector constructor/type into REPL and confirmed that on a test that calls this function in a loop (not inlined): ``` function fma(a: vector, b: vector, c: vector) return a * b + c end ``` ... this PR improves performance by ~6% (note that probably most of the overhead here is the call dispatch; I didn't want to brave testing a more complex expression). The assembly for an individual operation changes as follows: Before: ``` # %14 = MUL_VEC %12, %13 ; useCount: 2, lastUse: %22 dup s29,v31.s[0] dup s28,v30.s[0] fmul s29,s29,s28 ins v31.s[0],v29.s[0] dup s29,v31.s[1] dup s28,v30.s[1] fmul s29,s29,s28 ins v31.s[1],v29.s[0] dup s29,v31.s[2] dup s28,v30.s[2] fmul s29,s29,s28 ins v31.s[2],v29.s[0] ``` After: ``` # %14 = MUL_VEC %12, %13 ; useCount: 2, lastUse: %22 ins v31.s[3],w31 ins v30.s[3],w31 fmul v31.4s,v31.4s,v30.4s movz w17,#4 ins v31.s[3],w17 ``` **edit** final form (see comments): ``` # %14 = MUL_VEC %12, %13 ; useCount: 2, lastUse: %22 fmul v31.4s,v31.4s,v30.4s movz w17,#4 ins v31.s[3],w17 ```
617 lines
19 KiB
C++
617 lines
19 KiB
C++
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
#include "Luau/AssemblyBuilderA64.h"
|
|
#include "Luau/StringUtils.h"
|
|
|
|
#include "doctest.h"
|
|
|
|
#include <string.h>
|
|
|
|
using namespace Luau::CodeGen;
|
|
using namespace Luau::CodeGen::A64;
|
|
|
|
static std::string bytecodeAsArray(const std::vector<uint8_t>& bytecode)
|
|
{
|
|
std::string result = "{";
|
|
|
|
for (size_t i = 0; i < bytecode.size(); i++)
|
|
Luau::formatAppend(result, "%s0x%02x", i == 0 ? "" : ", ", bytecode[i]);
|
|
|
|
return result.append("}");
|
|
}
|
|
|
|
static std::string bytecodeAsArray(const std::vector<uint32_t>& code)
|
|
{
|
|
std::string result = "{";
|
|
|
|
for (size_t i = 0; i < code.size(); i++)
|
|
Luau::formatAppend(result, "%s0x%08x", i == 0 ? "" : ", ", code[i]);
|
|
|
|
return result.append("}");
|
|
}
|
|
|
|
class AssemblyBuilderA64Fixture
|
|
{
|
|
public:
|
|
bool check(void (*f)(AssemblyBuilderA64& build), std::vector<uint32_t> code, std::vector<uint8_t> data = {}, unsigned int features = 0)
|
|
{
|
|
AssemblyBuilderA64 build(/* logText= */ false, features);
|
|
|
|
f(build);
|
|
|
|
build.finalize();
|
|
|
|
if (build.code != code)
|
|
{
|
|
printf("Expected code: %s\nReceived code: %s\n", bytecodeAsArray(code).c_str(), bytecodeAsArray(build.code).c_str());
|
|
return false;
|
|
}
|
|
|
|
if (build.data != data)
|
|
{
|
|
printf("Expected data: %s\nReceived data: %s\n", bytecodeAsArray(data).c_str(), bytecodeAsArray(build.data).c_str());
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
};
|
|
|
|
// armconverter.com can be used to validate instruction sequences
|
|
TEST_SUITE_BEGIN("A64Assembly");
|
|
|
|
#define SINGLE_COMPARE(inst, ...) \
|
|
CHECK(check( \
|
|
[](AssemblyBuilderA64& build) { \
|
|
build.inst; \
|
|
}, \
|
|
{__VA_ARGS__}))
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Unary")
|
|
{
|
|
SINGLE_COMPARE(neg(x0, x1), 0xCB0103E0);
|
|
SINGLE_COMPARE(neg(w0, w1), 0x4B0103E0);
|
|
SINGLE_COMPARE(mvn_(x0, x1), 0xAA2103E0);
|
|
|
|
SINGLE_COMPARE(clz(x0, x1), 0xDAC01020);
|
|
SINGLE_COMPARE(clz(w0, w1), 0x5AC01020);
|
|
SINGLE_COMPARE(rbit(x0, x1), 0xDAC00020);
|
|
SINGLE_COMPARE(rbit(w0, w1), 0x5AC00020);
|
|
SINGLE_COMPARE(rev(w0, w1), 0x5AC00820);
|
|
SINGLE_COMPARE(rev(x0, x1), 0xDAC00C20);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Binary")
|
|
{
|
|
// reg, reg
|
|
SINGLE_COMPARE(add(x0, x1, x2), 0x8B020020);
|
|
SINGLE_COMPARE(add(w0, w1, w2), 0x0B020020);
|
|
SINGLE_COMPARE(add(x0, x1, x2, 7), 0x8B021C20);
|
|
SINGLE_COMPARE(add(x0, x1, x2, -7), 0x8B421C20);
|
|
SINGLE_COMPARE(sub(x0, x1, x2), 0xCB020020);
|
|
SINGLE_COMPARE(and_(x0, x1, x2), 0x8A020020);
|
|
SINGLE_COMPARE(and_(x0, x1, x2, 7), 0x8A021C20);
|
|
SINGLE_COMPARE(and_(x0, x1, x2, -7), 0x8A421C20);
|
|
SINGLE_COMPARE(bic(x0, x1, x2), 0x8A220020);
|
|
SINGLE_COMPARE(orr(x0, x1, x2), 0xAA020020);
|
|
SINGLE_COMPARE(eor(x0, x1, x2), 0xCA020020);
|
|
SINGLE_COMPARE(lsl(x0, x1, x2), 0x9AC22020);
|
|
SINGLE_COMPARE(lsl(w0, w1, w2), 0x1AC22020);
|
|
SINGLE_COMPARE(lsr(x0, x1, x2), 0x9AC22420);
|
|
SINGLE_COMPARE(asr(x0, x1, x2), 0x9AC22820);
|
|
SINGLE_COMPARE(ror(x0, x1, x2), 0x9AC22C20);
|
|
SINGLE_COMPARE(cmp(x0, x1), 0xEB01001F);
|
|
SINGLE_COMPARE(tst(x0, x1), 0xEA01001F);
|
|
|
|
// reg, imm
|
|
SINGLE_COMPARE(add(x3, x7, 78), 0x910138E3);
|
|
SINGLE_COMPARE(add(w3, w7, 78), 0x110138E3);
|
|
SINGLE_COMPARE(sub(w3, w7, 78), 0x510138E3);
|
|
SINGLE_COMPARE(cmp(w0, 42), 0x7100A81F);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "BinaryExtended")
|
|
{
|
|
// reg, reg
|
|
SINGLE_COMPARE(add(x0, x1, w2, 3), 0x8B224C20);
|
|
SINGLE_COMPARE(sub(x0, x1, w2, 3), 0xCB224C20);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "BinaryImm")
|
|
{
|
|
// instructions
|
|
SINGLE_COMPARE(and_(w1, w2, 1), 0x12000041);
|
|
SINGLE_COMPARE(orr(w1, w2, 1), 0x32000041);
|
|
SINGLE_COMPARE(eor(w1, w2, 1), 0x52000041);
|
|
SINGLE_COMPARE(tst(w1, 1), 0x7200003f);
|
|
|
|
// various mask forms
|
|
SINGLE_COMPARE(and_(w0, w0, 1), 0x12000000);
|
|
SINGLE_COMPARE(and_(w0, w0, 3), 0x12000400);
|
|
SINGLE_COMPARE(and_(w0, w0, 7), 0x12000800);
|
|
SINGLE_COMPARE(and_(w0, w0, 2147483647), 0x12007800);
|
|
SINGLE_COMPARE(and_(w0, w0, 6), 0x121F0400);
|
|
SINGLE_COMPARE(and_(w0, w0, 12), 0x121E0400);
|
|
SINGLE_COMPARE(and_(w0, w0, 2147483648), 0x12010000);
|
|
|
|
// shifts
|
|
SINGLE_COMPARE(lsl(w1, w2, 1), 0x531F7841);
|
|
SINGLE_COMPARE(lsl(x1, x2, 1), 0xD37FF841);
|
|
SINGLE_COMPARE(lsr(w1, w2, 1), 0x53017C41);
|
|
SINGLE_COMPARE(lsr(x1, x2, 1), 0xD341FC41);
|
|
SINGLE_COMPARE(asr(w1, w2, 1), 0x13017C41);
|
|
SINGLE_COMPARE(asr(x1, x2, 1), 0x9341FC41);
|
|
SINGLE_COMPARE(ror(w1, w2, 1), 0x13820441);
|
|
SINGLE_COMPARE(ror(x1, x2, 1), 0x93C20441);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Bitfield")
|
|
{
|
|
SINGLE_COMPARE(ubfiz(x1, x2, 37, 5), 0xD35B1041);
|
|
SINGLE_COMPARE(ubfx(x1, x2, 37, 5), 0xD365A441);
|
|
SINGLE_COMPARE(sbfiz(x1, x2, 37, 5), 0x935B1041);
|
|
SINGLE_COMPARE(sbfx(x1, x2, 37, 5), 0x9365A441);
|
|
|
|
SINGLE_COMPARE(ubfiz(w1, w2, 17, 5), 0x530F1041);
|
|
SINGLE_COMPARE(ubfx(w1, w2, 17, 5), 0x53115441);
|
|
SINGLE_COMPARE(sbfiz(w1, w2, 17, 5), 0x130F1041);
|
|
SINGLE_COMPARE(sbfx(w1, w2, 17, 5), 0x13115441);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Loads")
|
|
{
|
|
// address forms
|
|
SINGLE_COMPARE(ldr(x0, x1), 0xF9400020);
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, 8)), 0xF9400420);
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, x7)), 0xF8676820);
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, -7)), 0xF85F9020);
|
|
|
|
// load sizes
|
|
SINGLE_COMPARE(ldr(x0, x1), 0xF9400020);
|
|
SINGLE_COMPARE(ldr(w0, x1), 0xB9400020);
|
|
SINGLE_COMPARE(ldrb(w0, x1), 0x39400020);
|
|
SINGLE_COMPARE(ldrh(w0, x1), 0x79400020);
|
|
SINGLE_COMPARE(ldrsb(x0, x1), 0x39800020);
|
|
SINGLE_COMPARE(ldrsb(w0, x1), 0x39C00020);
|
|
SINGLE_COMPARE(ldrsh(x0, x1), 0x79800020);
|
|
SINGLE_COMPARE(ldrsh(w0, x1), 0x79C00020);
|
|
SINGLE_COMPARE(ldrsw(x0, x1), 0xB9800020);
|
|
|
|
// load sizes x offset scaling
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, 8)), 0xF9400420);
|
|
SINGLE_COMPARE(ldr(w0, mem(x1, 8)), 0xB9400820);
|
|
SINGLE_COMPARE(ldrb(w0, mem(x1, 8)), 0x39402020);
|
|
SINGLE_COMPARE(ldrh(w0, mem(x1, 8)), 0x79401020);
|
|
SINGLE_COMPARE(ldrsb(w0, mem(x1, 8)), 0x39C02020);
|
|
SINGLE_COMPARE(ldrsh(w0, mem(x1, 8)), 0x79C01020);
|
|
|
|
// paired loads
|
|
SINGLE_COMPARE(ldp(x0, x1, mem(x2, 8)), 0xA9408440);
|
|
SINGLE_COMPARE(ldp(w0, w1, mem(x2, -8)), 0x297F0440);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Stores")
|
|
{
|
|
// address forms
|
|
SINGLE_COMPARE(str(x0, x1), 0xF9000020);
|
|
SINGLE_COMPARE(str(x0, mem(x1, 8)), 0xF9000420);
|
|
SINGLE_COMPARE(str(x0, mem(x1, x7)), 0xF8276820);
|
|
SINGLE_COMPARE(strh(w0, mem(x1, -7)), 0x781F9020);
|
|
|
|
// store sizes
|
|
SINGLE_COMPARE(str(x0, x1), 0xF9000020);
|
|
SINGLE_COMPARE(str(w0, x1), 0xB9000020);
|
|
SINGLE_COMPARE(strb(w0, x1), 0x39000020);
|
|
SINGLE_COMPARE(strh(w0, x1), 0x79000020);
|
|
|
|
// store sizes x offset scaling
|
|
SINGLE_COMPARE(str(x0, mem(x1, 8)), 0xF9000420);
|
|
SINGLE_COMPARE(str(w0, mem(x1, 8)), 0xB9000820);
|
|
SINGLE_COMPARE(strb(w0, mem(x1, 8)), 0x39002020);
|
|
SINGLE_COMPARE(strh(w0, mem(x1, 8)), 0x79001020);
|
|
|
|
// paired stores
|
|
SINGLE_COMPARE(stp(x0, x1, mem(x2, 8)), 0xA9008440);
|
|
SINGLE_COMPARE(stp(w0, w1, mem(x2, -8)), 0x293F0440);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Moves")
|
|
{
|
|
SINGLE_COMPARE(mov(x0, x1), 0xAA0103E0);
|
|
SINGLE_COMPARE(mov(w0, w1), 0x2A0103E0);
|
|
SINGLE_COMPARE(mov(q0, q1), 0x4EA11C20);
|
|
|
|
SINGLE_COMPARE(movz(x0, 42), 0xD2800540);
|
|
SINGLE_COMPARE(movz(w0, 42), 0x52800540);
|
|
SINGLE_COMPARE(movn(x0, 42), 0x92800540);
|
|
SINGLE_COMPARE(movn(w0, 42), 0x12800540);
|
|
SINGLE_COMPARE(movk(x0, 42, 16), 0xF2A00540);
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
build.mov(x0, 42);
|
|
},
|
|
{0xD2800540}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
build.mov(x0, 424242);
|
|
},
|
|
{0xD28F2640, 0xF2A000C0}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
build.mov(x0, -42);
|
|
},
|
|
{0x92800520}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
build.mov(x0, -424242);
|
|
},
|
|
{0x928F2620, 0xF2BFFF20}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
build.mov(x0, -65536);
|
|
},
|
|
{0x929FFFE0}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
build.mov(x0, -65537);
|
|
},
|
|
{0x92800000, 0xF2BFFFC0}));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "ControlFlow")
|
|
{
|
|
// Jump back
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
Label start = build.setLabel();
|
|
build.mov(x0, x1);
|
|
build.b(ConditionA64::Equal, start);
|
|
},
|
|
{0xAA0103E0, 0x54FFFFE0}));
|
|
|
|
// Jump forward
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
Label skip;
|
|
build.b(ConditionA64::Equal, skip);
|
|
build.mov(x0, x1);
|
|
build.setLabel(skip);
|
|
},
|
|
{0x54000040, 0xAA0103E0}));
|
|
|
|
// Jumps
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
Label skip;
|
|
build.b(ConditionA64::Equal, skip);
|
|
build.cbz(x0, skip);
|
|
build.cbnz(x0, skip);
|
|
build.tbz(x0, 5, skip);
|
|
build.tbnz(x0, 5, skip);
|
|
build.setLabel(skip);
|
|
build.b(skip);
|
|
build.bl(skip);
|
|
},
|
|
{0x540000A0, 0xB4000080, 0xB5000060, 0x36280040, 0x37280020, 0x14000000, 0x97ffffff}));
|
|
|
|
// Basic control flow
|
|
SINGLE_COMPARE(br(x0), 0xD61F0000);
|
|
SINGLE_COMPARE(blr(x0), 0xD63F0000);
|
|
SINGLE_COMPARE(ret(), 0xD65F03C0);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "StackOps")
|
|
{
|
|
SINGLE_COMPARE(mov(x0, sp), 0x910003E0);
|
|
SINGLE_COMPARE(mov(sp, x0), 0x9100001F);
|
|
|
|
SINGLE_COMPARE(add(sp, sp, 4), 0x910013FF);
|
|
SINGLE_COMPARE(sub(sp, sp, 4), 0xD10013FF);
|
|
|
|
SINGLE_COMPARE(add(x0, sp, 4), 0x910013E0);
|
|
SINGLE_COMPARE(sub(sp, x0, 4), 0xD100101F);
|
|
|
|
SINGLE_COMPARE(ldr(x0, mem(sp, 8)), 0xF94007E0);
|
|
SINGLE_COMPARE(str(x0, mem(sp, 8)), 0xF90007E0);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Constants")
|
|
{
|
|
// clang-format off
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
char arr[12] = "hello world";
|
|
build.adr(x0, arr, 12);
|
|
build.adr(x0, uint64_t(0x1234567887654321));
|
|
build.adr(x0, 1.0);
|
|
},
|
|
{
|
|
0x10ffffa0, 0x10ffff20, 0x10fffec0
|
|
},
|
|
{
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f,
|
|
0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
|
|
0x00, 0x00, 0x00, 0x00, // 4b padding to align double
|
|
'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0,
|
|
}));
|
|
// clang-format on
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOfLabel")
|
|
{
|
|
// clang-format off
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
Label label;
|
|
build.adr(x0, label);
|
|
build.add(x0, x0, x0);
|
|
build.setLabel(label);
|
|
},
|
|
{
|
|
0x10000040, 0x8b000000,
|
|
}));
|
|
// clang-format on
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPBasic")
|
|
{
|
|
SINGLE_COMPARE(fmov(d0, d1), 0x1E604020);
|
|
SINGLE_COMPARE(fmov(d0, x1), 0x9E670020);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPMath")
|
|
{
|
|
SINGLE_COMPARE(fabs(d1, d2), 0x1E60C041);
|
|
SINGLE_COMPARE(fadd(d1, d2, d3), 0x1E632841);
|
|
SINGLE_COMPARE(fadd(s29, s29, s28), 0x1E3C2BBD);
|
|
SINGLE_COMPARE(fdiv(d1, d2, d3), 0x1E631841);
|
|
SINGLE_COMPARE(fdiv(s29, s29, s28), 0x1E3C1BBD);
|
|
SINGLE_COMPARE(fmul(d1, d2, d3), 0x1E630841);
|
|
SINGLE_COMPARE(fmul(s29, s29, s28), 0x1E3C0BBD);
|
|
SINGLE_COMPARE(fneg(d1, d2), 0x1E614041);
|
|
SINGLE_COMPARE(fneg(s30, s30), 0x1E2143DE);
|
|
SINGLE_COMPARE(fsqrt(d1, d2), 0x1E61C041);
|
|
SINGLE_COMPARE(fsub(d1, d2, d3), 0x1E633841);
|
|
SINGLE_COMPARE(fsub(s29, s29, s28), 0x1E3C3BBD);
|
|
|
|
SINGLE_COMPARE(frinta(d1, d2), 0x1E664041);
|
|
SINGLE_COMPARE(frintm(d1, d2), 0x1E654041);
|
|
SINGLE_COMPARE(frintp(d1, d2), 0x1E64C041);
|
|
|
|
SINGLE_COMPARE(fcvt(s1, d2), 0x1E624041);
|
|
SINGLE_COMPARE(fcvt(d1, s2), 0x1E22C041);
|
|
|
|
SINGLE_COMPARE(fcvtzs(w1, d2), 0x1E780041);
|
|
SINGLE_COMPARE(fcvtzs(x1, d2), 0x9E780041);
|
|
SINGLE_COMPARE(fcvtzu(w1, d2), 0x1E790041);
|
|
SINGLE_COMPARE(fcvtzu(x1, d2), 0x9E790041);
|
|
|
|
SINGLE_COMPARE(scvtf(d1, w2), 0x1E620041);
|
|
SINGLE_COMPARE(scvtf(d1, x2), 0x9E620041);
|
|
SINGLE_COMPARE(ucvtf(d1, w2), 0x1E630041);
|
|
SINGLE_COMPARE(ucvtf(d1, x2), 0x9E630041);
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderA64& build) {
|
|
build.fjcvtzs(w1, d2);
|
|
},
|
|
{0x1E7E0041}, {}, A64::Feature_JSCVT));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPLoadStore")
|
|
{
|
|
// address forms
|
|
SINGLE_COMPARE(ldr(d0, x1), 0xFD400020);
|
|
SINGLE_COMPARE(ldr(d0, mem(x1, 8)), 0xFD400420);
|
|
SINGLE_COMPARE(ldr(d0, mem(x1, x7)), 0xFC676820);
|
|
SINGLE_COMPARE(ldr(d0, mem(x1, -7)), 0xFC5F9020);
|
|
SINGLE_COMPARE(str(d0, x1), 0xFD000020);
|
|
SINGLE_COMPARE(str(d0, mem(x1, 8)), 0xFD000420);
|
|
SINGLE_COMPARE(str(d0, mem(x1, x7)), 0xFC276820);
|
|
SINGLE_COMPARE(str(d0, mem(x1, -7)), 0xFC1F9020);
|
|
|
|
// load/store sizes
|
|
SINGLE_COMPARE(ldr(s0, x1), 0xBD400020);
|
|
SINGLE_COMPARE(ldr(d0, x1), 0xFD400020);
|
|
SINGLE_COMPARE(ldr(q0, x1), 0x3DC00020);
|
|
SINGLE_COMPARE(str(s0, x1), 0xBD000020);
|
|
SINGLE_COMPARE(str(d0, x1), 0xFD000020);
|
|
SINGLE_COMPARE(str(q0, x1), 0x3D800020);
|
|
|
|
// load/store sizes x offset scaling
|
|
SINGLE_COMPARE(ldr(q0, mem(x1, 16)), 0x3DC00420);
|
|
SINGLE_COMPARE(ldr(d0, mem(x1, 16)), 0xFD400820);
|
|
SINGLE_COMPARE(ldr(s0, mem(x1, 16)), 0xBD401020);
|
|
SINGLE_COMPARE(str(q0, mem(x1, 16)), 0x3D800420);
|
|
SINGLE_COMPARE(str(d0, mem(x1, 16)), 0xFD000820);
|
|
SINGLE_COMPARE(str(s0, mem(x1, 16)), 0xBD001020);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPInsertExtract")
|
|
{
|
|
SINGLE_COMPARE(ins_4s(q29, w17, 3), 0x4E1C1E3D);
|
|
SINGLE_COMPARE(ins_4s(q31, 0, q29, 0), 0x6E0407BF);
|
|
SINGLE_COMPARE(dup_4s(s29, q31, 2), 0x5E1407FD);
|
|
SINGLE_COMPARE(dup_4s(q29, q30, 0), 0x4E0407DD);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPCompare")
|
|
{
|
|
SINGLE_COMPARE(fcmp(d0, d1), 0x1E612000);
|
|
SINGLE_COMPARE(fcmpz(d1), 0x1E602028);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPImm")
|
|
{
|
|
SINGLE_COMPARE(fmov(d0, 0), 0x2F00E400);
|
|
SINGLE_COMPARE(fmov(d0, 0.125), 0x1E681000);
|
|
SINGLE_COMPARE(fmov(d0, -0.125), 0x1E781000);
|
|
|
|
CHECK(!AssemblyBuilderA64::isFmovSupported(-0.0));
|
|
CHECK(!AssemblyBuilderA64::isFmovSupported(0.12389));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOffsetSize")
|
|
{
|
|
SINGLE_COMPARE(ldr(w0, mem(x1, 16)), 0xB9401020);
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, 16)), 0xF9400820);
|
|
SINGLE_COMPARE(ldr(d0, mem(x1, 16)), 0xFD400820);
|
|
SINGLE_COMPARE(ldr(q0, mem(x1, 16)), 0x3DC00420);
|
|
|
|
SINGLE_COMPARE(str(w0, mem(x1, 16)), 0xB9001020);
|
|
SINGLE_COMPARE(str(x0, mem(x1, 16)), 0xF9000820);
|
|
SINGLE_COMPARE(str(d0, mem(x1, 16)), 0xFD000820);
|
|
SINGLE_COMPARE(str(q0, mem(x1, 16)), 0x3D800420);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Conditionals")
|
|
{
|
|
SINGLE_COMPARE(csel(x0, x1, x2, ConditionA64::Equal), 0x9A820020);
|
|
SINGLE_COMPARE(csel(w0, w1, w2, ConditionA64::Equal), 0x1A820020);
|
|
SINGLE_COMPARE(fcsel(d0, d1, d2, ConditionA64::Equal), 0x1E620C20);
|
|
|
|
SINGLE_COMPARE(cset(x1, ConditionA64::Less), 0x9A9FA7E1);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Undefined")
|
|
{
|
|
SINGLE_COMPARE(udf(), 0x00000000);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "PrePostIndexing")
|
|
{
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, 1)), 0xF8401020);
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, 1, AddressKindA64::pre)), 0xF8401C20);
|
|
SINGLE_COMPARE(ldr(x0, mem(x1, 1, AddressKindA64::post)), 0xF8401420);
|
|
|
|
SINGLE_COMPARE(ldr(q0, mem(x1, 1)), 0x3CC01020);
|
|
SINGLE_COMPARE(ldr(q0, mem(x1, 1, AddressKindA64::pre)), 0x3CC01C20);
|
|
SINGLE_COMPARE(ldr(q0, mem(x1, 1, AddressKindA64::post)), 0x3CC01420);
|
|
|
|
SINGLE_COMPARE(str(x0, mem(x1, 1)), 0xF8001020);
|
|
SINGLE_COMPARE(str(x0, mem(x1, 1, AddressKindA64::pre)), 0xF8001C20);
|
|
SINGLE_COMPARE(str(x0, mem(x1, 1, AddressKindA64::post)), 0xF8001420);
|
|
|
|
SINGLE_COMPARE(str(q0, mem(x1, 1)), 0x3C801020);
|
|
SINGLE_COMPARE(str(q0, mem(x1, 1, AddressKindA64::pre)), 0x3C801C20);
|
|
SINGLE_COMPARE(str(q0, mem(x1, 1, AddressKindA64::post)), 0x3C801420);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "SIMDMath")
|
|
{
|
|
SINGLE_COMPARE(fadd(q0, q1, q2), 0x4E22D420);
|
|
SINGLE_COMPARE(fsub(q0, q1, q2), 0x4EA2D420);
|
|
SINGLE_COMPARE(fmul(q0, q1, q2), 0x6E22DC20);
|
|
SINGLE_COMPARE(fdiv(q0, q1, q2), 0x6E22FC20);
|
|
SINGLE_COMPARE(fneg(q0, q1), 0x6EA0F820);
|
|
}
|
|
|
|
TEST_CASE("LogTest")
|
|
{
|
|
AssemblyBuilderA64 build(/* logText= */ true);
|
|
|
|
build.add(sp, sp, 4);
|
|
build.add(w0, w1, w2);
|
|
build.add(x0, x1, x2, 2);
|
|
build.add(x0, x1, x2, -2);
|
|
build.add(w7, w8, 5);
|
|
build.add(x7, x8, 5);
|
|
build.ldr(x7, x8);
|
|
build.ldr(x7, mem(x8, 8));
|
|
build.ldr(x7, mem(x8, x9));
|
|
build.mov(x1, x2);
|
|
build.movk(x1, 42, 16);
|
|
build.cmp(x1, x2);
|
|
build.blr(x0);
|
|
|
|
Label l;
|
|
build.b(ConditionA64::Plus, l);
|
|
build.cbz(x7, l);
|
|
|
|
build.ldp(x0, x1, mem(x8, 8));
|
|
build.adr(x0, l);
|
|
|
|
build.fabs(d1, d2);
|
|
build.ldr(q1, x2);
|
|
|
|
build.csel(x0, x1, x2, ConditionA64::Equal);
|
|
build.cset(x0, ConditionA64::Equal);
|
|
|
|
build.fcmp(d0, d1);
|
|
build.fcmpz(d0);
|
|
|
|
build.fmov(d0, 0.25);
|
|
build.tbz(x0, 5, l);
|
|
|
|
build.fcvt(s1, d2);
|
|
|
|
build.ubfx(x1, x2, 37, 5);
|
|
|
|
build.ldr(x0, mem(x1, 1));
|
|
build.ldr(x0, mem(x1, 1, AddressKindA64::pre));
|
|
build.ldr(x0, mem(x1, 1, AddressKindA64::post));
|
|
|
|
build.add(x1, x2, w3, 3);
|
|
|
|
build.ins_4s(q29, w17, 3);
|
|
build.ins_4s(q31, 1, q29, 2);
|
|
build.dup_4s(s29, q31, 2);
|
|
build.dup_4s(q29, q30, 0);
|
|
build.fmul(q0, q1, q2);
|
|
|
|
build.setLabel(l);
|
|
build.ret();
|
|
|
|
build.finalize();
|
|
|
|
std::string expected = R"(
|
|
add sp,sp,#4
|
|
add w0,w1,w2
|
|
add x0,x1,x2 LSL #2
|
|
add x0,x1,x2 LSR #2
|
|
add w7,w8,#5
|
|
add x7,x8,#5
|
|
ldr x7,[x8]
|
|
ldr x7,[x8,#8]
|
|
ldr x7,[x8,x9]
|
|
mov x1,x2
|
|
movk x1,#42 LSL #16
|
|
cmp x1,x2
|
|
blr x0
|
|
b.pl .L1
|
|
cbz x7,.L1
|
|
ldp x0,x1,[x8,#8]
|
|
adr x0,.L1
|
|
fabs d1,d2
|
|
ldr q1,[x2]
|
|
csel x0,x1,x2,eq
|
|
cset x0,eq
|
|
fcmp d0,d1
|
|
fcmp d0,#0
|
|
fmov d0,#0.25
|
|
tbz x0,#5,.L1
|
|
fcvt s1,d2
|
|
ubfx x1,x2,#3705
|
|
ldr x0,[x1,#1]
|
|
ldr x0,[x1,#1]!
|
|
ldr x0,[x1]!,#1
|
|
add x1,x2,w3 UXTW #3
|
|
ins v29.s[3],w17
|
|
ins v31.s[1],v29.s[2]
|
|
dup s29,v31.s[2]
|
|
dup v29.4s,v30.s[0]
|
|
fmul v0.4s,v1.4s,v2.4s
|
|
.L1:
|
|
ret
|
|
)";
|
|
|
|
CHECK("\n" + build.text == expected);
|
|
}
|
|
|
|
TEST_SUITE_END();
|