emit_x64_packed: PackedAbsDiffSumS8: Fix case when bits above the lower 32 bits are not zero

This commit is contained in:
MerryMage 2020-06-12 15:24:09 +01:00
parent 6cf5c78bfa
commit e953f67201
2 changed files with 53 additions and 1 deletions

View file

@ -655,7 +655,19 @@ void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psadbw);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// TODO: Optimize with zero-extension detection
code.movaps(tmp, code.MConst(xword, 0xFFFFFFFF));
code.pand(xmm_a, tmp);
code.pand(xmm_b, tmp);
code.psadbw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {

View file

@ -425,3 +425,43 @@ TEST_CASE("arm: Test stepping 3", "[arm]") {
REQUIRE(jit.Regs()[15] == 20);
REQUIRE(jit.Cpsr() == 0x000001d0);
}
TEST_CASE("arm: PackedAbsDiffSumS8", "[arm][A32]") {
// This was a randomized test-case that was failing.
// In circumstances there were cases when the upper 32 bits of an argument to psadbw were not zero.
ArmTestEnv test_env;
A32::Jit jit{GetUserConfig(&test_env)};
test_env.code_mem = {
0x87414354, // smlsldhi r4, r1, r4, r3
0xe7886412, // usad8a r8, r2, r4, r6
0xeafffffe, // b +#0
};
jit.Regs() = {
0xea85297c, 0x417ad918, 0x64f8b70b, 0xcca0373e, 0xbc722361, 0xc528c69e, 0xca926de8, 0xd665d210,
0xb5650555, 0x4a24b25b, 0xaed44144, 0xe87230b2, 0x98e391de, 0x126efc0c, 0xe591fd11, 0x00000000,
};
jit.SetCpsr(0xb0000010);
test_env.ticks_left = 3;
jit.Run();
REQUIRE(jit.Regs()[0] == 0xea85297c);
REQUIRE(jit.Regs()[1] == 0x417ad918);
REQUIRE(jit.Regs()[2] == 0x64f8b70b);
REQUIRE(jit.Regs()[3] == 0xcca0373e);
REQUIRE(jit.Regs()[4] == 0xb685ec9f);
REQUIRE(jit.Regs()[5] == 0xc528c69e);
REQUIRE(jit.Regs()[6] == 0xca926de8);
REQUIRE(jit.Regs()[7] == 0xd665d210);
REQUIRE(jit.Regs()[8] == 0xca926f76);
REQUIRE(jit.Regs()[9] == 0x4a24b25b);
REQUIRE(jit.Regs()[10] == 0xaed44144);
REQUIRE(jit.Regs()[11] == 0xe87230b2);
REQUIRE(jit.Regs()[12] == 0x98e391de);
REQUIRE(jit.Regs()[13] == 0x126efc0c);
REQUIRE(jit.Regs()[14] == 0xe591fd11);
REQUIRE(jit.Regs()[15] == 0x00000008);
REQUIRE(jit.Cpsr() == 0xb0000010);
}