IR: Add microinstructions UnsignedSaturation and SignedSaturation
This commit is contained in:
parent
b1df70578f
commit
6a269a6ebd
4 changed files with 93 additions and 0 deletions
|
@ -1348,6 +1348,81 @@ void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitUnsignedSaturation(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
size_t N = inst->GetArg(1).GetU8();
|
||||||
|
ASSERT(N <= 31);
|
||||||
|
|
||||||
|
u32 saturated_value = (1u << N) - 1;
|
||||||
|
|
||||||
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32();
|
||||||
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||||
|
code->xor_(overflow, overflow);
|
||||||
|
code->cmp(reg_a, saturated_value);
|
||||||
|
code->mov(result, saturated_value);
|
||||||
|
code->cmovle(result, overflow);
|
||||||
|
code->cmovbe(result, reg_a);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
EraseInstruction(block, overflow_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
code->seta(overflow.cvt8());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturation(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
size_t N = inst->GetArg(1).GetU8();
|
||||||
|
ASSERT(N >= 1 && N <= 32);
|
||||||
|
|
||||||
|
if (N == 32) {
|
||||||
|
reg_alloc.RegisterAddDef(inst, a);
|
||||||
|
if (overflow_inst) {
|
||||||
|
auto no_overflow = IR::Value(false);
|
||||||
|
overflow_inst->ReplaceUsesWith(no_overflow);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 mask = (1u << N) - 1;
|
||||||
|
u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||||
|
u32 negative_saturated_value = 1u << (N - 1);
|
||||||
|
u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
|
||||||
|
|
||||||
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32();
|
||||||
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
||||||
|
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
||||||
|
code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]);
|
||||||
|
|
||||||
|
// Put the appropriate saturated value in result
|
||||||
|
code->cmp(reg_a, positive_saturated_value);
|
||||||
|
code->mov(tmp, positive_saturated_value);
|
||||||
|
code->mov(result, sext_negative_satured_value);
|
||||||
|
code->cmovg(result, tmp);
|
||||||
|
|
||||||
|
// Do the saturation
|
||||||
|
code->cmp(overflow, mask);
|
||||||
|
code->cmovbe(result, reg_a);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
EraseInstruction(block, overflow_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
code->seta(overflow.cvt8());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts the most significant bits from each of the packed bytes, and packs them together.
|
* Extracts the most significant bits from each of the packed bytes, and packs them together.
|
||||||
*
|
*
|
||||||
|
|
|
@ -348,6 +348,20 @@ IREmitter::ResultAndOverflow IREmitter::SignedSaturatedSub(const Value& a, const
|
||||||
return {result, overflow};
|
return {result, overflow};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndOverflow IREmitter::UnsignedSaturation(const Value& a, size_t bit_size_to_saturate_to) {
|
||||||
|
ASSERT(bit_size_to_saturate_to <= 31);
|
||||||
|
auto result = Inst(Opcode::UnsignedSaturation, {a, Imm8(static_cast<u8>(bit_size_to_saturate_to))});
|
||||||
|
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndOverflow IREmitter::SignedSaturation(const Value& a, size_t bit_size_to_saturate_to) {
|
||||||
|
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
|
||||||
|
auto result = Inst(Opcode::SignedSaturation, {a, Imm8(static_cast<u8>(bit_size_to_saturate_to))});
|
||||||
|
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
|
IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
|
||||||
auto result = Inst(Opcode::PackedAddU8, {a, b});
|
auto result = Inst(Opcode::PackedAddU8, {a, b});
|
||||||
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
|
|
@ -138,6 +138,8 @@ public:
|
||||||
|
|
||||||
ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b);
|
ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b);
|
||||||
ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b);
|
ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b);
|
||||||
|
ResultAndOverflow UnsignedSaturation(const Value& a, size_t bit_size_to_saturate_to);
|
||||||
|
ResultAndOverflow SignedSaturation(const Value& a, size_t bit_size_to_saturate_to);
|
||||||
|
|
||||||
ResultAndGE PackedAddU8(const Value& a, const Value& b);
|
ResultAndGE PackedAddU8(const Value& a, const Value& b);
|
||||||
ResultAndGE PackedAddS8(const Value& a, const Value& b);
|
ResultAndGE PackedAddS8(const Value& a, const Value& b);
|
||||||
|
|
|
@ -79,6 +79,8 @@ OPCODE(NegateHighWord, T::U32, T::U32
|
||||||
// Saturated instructions
|
// Saturated instructions
|
||||||
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
|
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
|
||||||
|
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
|
||||||
|
|
||||||
// Packed instructions
|
// Packed instructions
|
||||||
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
||||||
|
|
Loading…
Reference in a new issue