Fix 128-bit ops
This commit is contained in:
parent
ef2851d595
commit
57871c5159
5 changed files with 161 additions and 17 deletions
|
@ -96,6 +96,123 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const
|
||||||
return target;
|
return target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* =========================== 128-bit versions =========================== */
|
||||||
|
|
||||||
|
static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||||
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
|
||||||
|
|
||||||
|
oaknut::Label l_addr, l_this;
|
||||||
|
|
||||||
|
void* target = code.ptr<void*>();
|
||||||
|
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||||
|
code.LDR(X0, l_this);
|
||||||
|
code.LDR(Xscratch0, l_addr);
|
||||||
|
code.BLR(Xscratch0);
|
||||||
|
code.STP(X0, X1, SP);
|
||||||
|
code.LDR(Q0, SP);
|
||||||
|
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||||
|
code.RET();
|
||||||
|
|
||||||
|
code.align(8);
|
||||||
|
code.l(l_this);
|
||||||
|
code.dx(info.this_ptr);
|
||||||
|
code.l(l_addr);
|
||||||
|
code.dx(info.fn_ptr);
|
||||||
|
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||||
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
oaknut::Label l_addr, l_this;
|
||||||
|
|
||||||
|
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector {
|
||||||
|
return conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
|
||||||
|
return conf.callbacks->MemoryRead128(vaddr);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
void* target = code.ptr<void*>();
|
||||||
|
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||||
|
code.LDR(X0, l_this);
|
||||||
|
code.LDR(Xscratch0, l_addr);
|
||||||
|
code.BLR(Xscratch0);
|
||||||
|
code.STP(X0, X1, SP);
|
||||||
|
code.LDR(Q0, SP);
|
||||||
|
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||||
|
code.RET();
|
||||||
|
|
||||||
|
code.align(8);
|
||||||
|
code.l(l_this);
|
||||||
|
code.dx(mcl::bit_cast<u64>(&conf));
|
||||||
|
code.l(l_addr);
|
||||||
|
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||||
|
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||||
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
|
||||||
|
|
||||||
|
oaknut::Label l_addr, l_this;
|
||||||
|
|
||||||
|
void* target = code.ptr<void*>();
|
||||||
|
ABI_PushRegisters(code, 0, sizeof(Vector));
|
||||||
|
code.STR(Q0, SP);
|
||||||
|
code.LDP(X2, X3, SP);
|
||||||
|
ABI_PopRegisters(code, 0, sizeof(Vector));
|
||||||
|
|
||||||
|
code.LDR(X0, l_this);
|
||||||
|
code.LDR(Xscratch0, l_addr);
|
||||||
|
code.BR(Xscratch0);
|
||||||
|
|
||||||
|
code.align(8);
|
||||||
|
code.l(l_this);
|
||||||
|
code.dx(info.this_ptr);
|
||||||
|
code.l(l_addr);
|
||||||
|
code.dx(info.fn_ptr);
|
||||||
|
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||||
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
oaknut::Label l_addr, l_this;
|
||||||
|
|
||||||
|
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 {
|
||||||
|
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
|
||||||
|
[&](Vector expected) -> bool {
|
||||||
|
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
|
||||||
|
})
|
||||||
|
? 0
|
||||||
|
: 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
void* target = code.ptr<void*>();
|
||||||
|
ABI_PushRegisters(code, 0, sizeof(Vector));
|
||||||
|
code.STR(Q0, SP);
|
||||||
|
code.LDP(X2, X3, SP);
|
||||||
|
ABI_PopRegisters(code, 0, sizeof(Vector));
|
||||||
|
|
||||||
|
code.LDR(X0, l_this);
|
||||||
|
code.LDR(Xscratch0, l_addr);
|
||||||
|
code.BR(Xscratch0);
|
||||||
|
|
||||||
|
code.align(8);
|
||||||
|
code.l(l_this);
|
||||||
|
code.dx(mcl::bit_cast<u64>(&conf));
|
||||||
|
code.l(l_addr);
|
||||||
|
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||||
|
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
|
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
|
||||||
: conf(conf)
|
: conf(conf)
|
||||||
, mem(conf.code_cache_size)
|
, mem(conf.code_cache_size)
|
||||||
|
@ -161,22 +278,22 @@ void A64AddressSpace::EmitPrelude() {
|
||||||
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
||||||
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
||||||
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
|
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
|
||||||
prelude_info.read_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead128>(code, conf.callbacks);
|
prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks);
|
||||||
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
|
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
|
||||||
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
|
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
|
||||||
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
|
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
|
||||||
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
|
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
|
||||||
prelude_info.exclusive_read_memory_128 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead128, Vector>(code, conf);
|
prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf);
|
||||||
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
|
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
|
||||||
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
|
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
|
||||||
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
|
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
|
||||||
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
|
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
|
||||||
prelude_info.write_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite128>(code, conf.callbacks);
|
prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks);
|
||||||
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
|
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
|
||||||
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
|
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
|
||||||
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
|
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
|
||||||
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
|
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
|
||||||
prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive128, Vector>(code, conf);
|
prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf);
|
||||||
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
|
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
|
||||||
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
|
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
|
||||||
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
|
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
|
||||||
|
|
|
@ -56,12 +56,14 @@ static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
|
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
|
||||||
|
if (frame_info.TYPE##s.size() > 0) { \
|
||||||
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
|
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
|
||||||
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
|
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||||
} \
|
} \
|
||||||
if (frame_info.TYPE##s.size() % 2 == 1) { \
|
if (frame_info.TYPE##s.size() % 2 == 1) { \
|
||||||
const size_t i = frame_info.TYPE##s.size() - 1; \
|
const size_t i = frame_info.TYPE##s.size() - 1; \
|
||||||
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
|
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {
|
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {
|
||||||
|
|
|
@ -465,7 +465,7 @@ void EmitIR<IR::Opcode::A64SetTPIDR>(oaknut::CodeGenerator& code, EmitContext& c
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||||
RegAlloc::Realize(Xvalue);
|
RegAlloc::Realize(Xvalue);
|
||||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidrro_el0));
|
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
|
||||||
code.STR(Xvalue, Xscratch0);
|
code.STR(Xvalue, Xscratch0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1353,9 +1353,13 @@ void EmitIR<IR::Opcode::ZeroExtendWordToLong>(oaknut::CodeGenerator&, EmitContex
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||||
|
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||||
|
RegAlloc::Realize(Xvalue, Qresult);
|
||||||
|
|
||||||
|
code.FMOV(Qresult->toD(), Xvalue);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
|
|
@ -157,17 +157,38 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
|
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
|
||||||
|
|
||||||
|
// AAPCS64 Next General-purpose Register Number
|
||||||
|
int ngrn = 0;
|
||||||
|
// AAPCS64 Next SIMD and Floating-point Register Number
|
||||||
|
int nsrn = 0;
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
if (args[i]) {
|
if (args[i]) {
|
||||||
ASSERT(gprs[i].IsCompletelyEmpty());
|
if (args[i]->get().GetType() == IR::Type::U128) {
|
||||||
LoadCopyInto(args[i]->get().value, oaknut::XReg{i});
|
ASSERT(fprs[nsrn].IsCompletelyEmpty());
|
||||||
|
LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn});
|
||||||
|
nsrn++;
|
||||||
|
} else {
|
||||||
|
ASSERT(gprs[ngrn].IsCompletelyEmpty());
|
||||||
|
LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn});
|
||||||
|
ngrn++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Gaps are assumed to be in general-purpose registers
|
||||||
|
// TODO: should there be a separate list passed for FPRs instead?
|
||||||
|
ngrn++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result) {
|
if (result) {
|
||||||
|
if (result->GetType() == IR::Type::U128) {
|
||||||
|
DefineAsRegister(result, Q0);
|
||||||
|
} else {
|
||||||
DefineAsRegister(result, X0);
|
DefineAsRegister(result, X0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {
|
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {
|
||||||
ASSERT(!ValueLocation(inst));
|
ASSERT(!ValueLocation(inst));
|
||||||
|
|
Loading…
Reference in a new issue