Fix 128-bit ops

This commit is contained in:
Liam 2022-11-14 16:46:56 -05:00
parent ef2851d595
commit 57871c5159
5 changed files with 161 additions and 17 deletions

View file

@ -96,6 +96,123 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const
return target;
}
/* =========================== 128-bit versions =========================== */
static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
oaknut::Label l_addr, l_this;
void* target = code.ptr<void*>();
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.STP(X0, X1, SP);
code.LDR(Q0, SP);
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector {
return conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
return conf.callbacks->MemoryRead128(vaddr);
});
};
void* target = code.ptr<void*>();
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.STP(X0, X1, SP);
code.LDR(Q0, SP);
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.RET();
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
oaknut::Label l_addr, l_this;
void* target = code.ptr<void*>();
ABI_PushRegisters(code, 0, sizeof(Vector));
code.STR(Q0, SP);
code.LDP(X2, X3, SP);
ABI_PopRegisters(code, 0, sizeof(Vector));
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
[&](Vector expected) -> bool {
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.ptr<void*>();
ABI_PushRegisters(code, 0, sizeof(Vector));
code.STR(Q0, SP);
code.LDP(X2, X3, SP);
ABI_PopRegisters(code, 0, sizeof(Vector));
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
: conf(conf)
, mem(conf.code_cache_size)
@ -161,22 +278,22 @@ void A64AddressSpace::EmitPrelude() {
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.read_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead128>(code, conf.callbacks);
prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks);
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
prelude_info.exclusive_read_memory_128 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead128, Vector>(code, conf);
prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf);
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.write_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite128>(code, conf.callbacks);
prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks);
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive128, Vector>(code, conf);
prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf);
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);

View file

@ -55,13 +55,15 @@ static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) {
};
}
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
} \
if (frame_info.TYPE##s.size() % 2 == 1) { \
const size_t i = frame_info.TYPE##s.size() - 1; \
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
if (frame_info.TYPE##s.size() > 0) { \
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
} \
if (frame_info.TYPE##s.size() % 2 == 1) { \
const size_t i = frame_info.TYPE##s.size() - 1; \
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
} \
}
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {

View file

@ -465,7 +465,7 @@ void EmitIR<IR::Opcode::A64SetTPIDR>(oaknut::CodeGenerator& code, EmitContext& c
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidrro_el0));
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
code.STR(Xvalue, Xscratch0);
}

View file

@ -1353,9 +1353,13 @@ void EmitIR<IR::Opcode::ZeroExtendWordToLong>(oaknut::CodeGenerator&, EmitContex
}
template<>
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Xvalue, Qresult);
code.FMOV(Qresult->toD(), Xvalue);
}
template<>

View file

@ -157,15 +157,36 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable
}
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
// AAPCS64 Next General-purpose Register Number
int ngrn = 0;
// AAPCS64 Next SIMD and Floating-point Register Number
int nsrn = 0;
for (int i = 0; i < 4; i++) {
if (args[i]) {
ASSERT(gprs[i].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::XReg{i});
if (args[i]->get().GetType() == IR::Type::U128) {
ASSERT(fprs[nsrn].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn});
nsrn++;
} else {
ASSERT(gprs[ngrn].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn});
ngrn++;
}
} else {
// Gaps are assumed to be in general-purpose registers
// TODO: should there be a separate list passed for FPRs instead?
ngrn++;
}
}
if (result) {
DefineAsRegister(result, X0);
if (result->GetType() == IR::Type::U128) {
DefineAsRegister(result, Q0);
} else {
DefineAsRegister(result, X0);
}
}
}