emit_arm64_memory: Use LDAR and STLR instead of barriers

This commit is contained in:
Merry 2023-01-03 16:51:31 +00:00
parent 4bf4d6565e
commit fae1d604e4

View file

@ -284,33 +284,59 @@ std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::Cod
template<std::size_t bitsize> template<std::size_t bitsize>
CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) { CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL; const auto index_ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
const auto add_ext = extend32 ? oaknut::AddSubExt::UXTW : oaknut::AddSubExt::LSL;
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset}; const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
const CodePtr fastmem_location = code.ptr<CodePtr>(); CodePtr fastmem_location = code.ptr<CodePtr>();
if (ordered) {
code.ADD(Xscratch0, Xbase, Roffset, add_ext);
fastmem_location = code.ptr<CodePtr>();
switch (bitsize) { switch (bitsize) {
case 8: case 8:
code.LDRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext); code.LDARB(oaknut::WReg{value_idx}, Xscratch0);
break; break;
case 16: case 16:
code.LDRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext); code.LDARH(oaknut::WReg{value_idx}, Xscratch0);
break; break;
case 32: case 32:
code.LDR(oaknut::WReg{value_idx}, Xbase, Roffset, ext); code.LDAR(oaknut::WReg{value_idx}, Xscratch0);
break; break;
case 64: case 64:
code.LDR(oaknut::XReg{value_idx}, Xbase, Roffset, ext); code.LDAR(oaknut::XReg{value_idx}, Xscratch0);
break; break;
case 128: case 128:
code.LDR(oaknut::QReg{value_idx}, Xbase, Roffset, ext); code.LDR(oaknut::QReg{value_idx}, Xscratch0);
code.DMB(oaknut::BarrierOp::ISH);
break; break;
default: default:
ASSERT_FALSE("Invalid bitsize"); ASSERT_FALSE("Invalid bitsize");
} }
} else {
fastmem_location = code.ptr<CodePtr>();
if (ordered) { switch (bitsize) {
// TODO: Use LDAR case 8:
code.DMB(oaknut::BarrierOp::ISH); code.LDRB(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 16:
code.LDRH(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 32:
code.LDR(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 64:
code.LDR(oaknut::XReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 128:
code.LDR(oaknut::QReg{value_idx}, Xbase, Roffset, index_ext);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
} }
return fastmem_location; return fastmem_location;
@ -318,38 +344,60 @@ CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg X
template<std::size_t bitsize> template<std::size_t bitsize>
CodePtr EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) { CodePtr EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL; const auto index_ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
const auto add_ext = extend32 ? oaknut::AddSubExt::UXTW : oaknut::AddSubExt::LSL;
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset}; const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
if (ordered) { CodePtr fastmem_location;
// TODO: Use STLR
code.DMB(oaknut::BarrierOp::ISH); if (ordered) {
} code.ADD(Xscratch0, Xbase, Roffset, add_ext);
fastmem_location = code.ptr<CodePtr>();
const CodePtr fastmem_location = code.ptr<CodePtr>();
switch (bitsize) { switch (bitsize) {
case 8: case 8:
code.STRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext); code.STLRB(oaknut::WReg{value_idx}, Xscratch0);
break; break;
case 16: case 16:
code.STRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext); code.STLRH(oaknut::WReg{value_idx}, Xscratch0);
break; break;
case 32: case 32:
code.STR(oaknut::WReg{value_idx}, Xbase, Roffset, ext); code.STLR(oaknut::WReg{value_idx}, Xscratch0);
break; break;
case 64: case 64:
code.STR(oaknut::XReg{value_idx}, Xbase, Roffset, ext); code.STLR(oaknut::XReg{value_idx}, Xscratch0);
break; break;
case 128: case 128:
code.STR(oaknut::QReg{value_idx}, Xbase, Roffset, ext); code.DMB(oaknut::BarrierOp::ISH);
code.STR(oaknut::QReg{value_idx}, Xscratch0);
code.DMB(oaknut::BarrierOp::ISH);
break; break;
default: default:
ASSERT_FALSE("Invalid bitsize"); ASSERT_FALSE("Invalid bitsize");
} }
} else {
fastmem_location = code.ptr<CodePtr>();
if (ordered) { switch (bitsize) {
// TODO: Use STLR case 8:
code.DMB(oaknut::BarrierOp::ISH); code.STRB(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 16:
code.STRH(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 32:
code.STR(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 64:
code.STR(oaknut::XReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 128:
code.STR(oaknut::QReg{value_idx}, Xbase, Roffset, index_ext);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
} }
return fastmem_location; return fastmem_location;