diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index b0bc5215..703e5c81 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1348,6 +1348,81 @@ void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) { } } +void EmitX64::EmitUnsignedSaturation(IR::Block& block, IR::Inst* inst) { + auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + IR::Value a = inst->GetArg(0); + size_t N = inst->GetArg(1).GetU8(); + ASSERT(N <= 31); + + u32 saturated_value = (1u << N) - 1; + + Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32(); + Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + + // Pseudocode: result = clamp(reg_a, 0, saturated_value); + code->xor_(overflow, overflow); + code->cmp(reg_a, saturated_value); + code->mov(result, saturated_value); + code->cmovle(result, overflow); + code->cmovbe(result, reg_a); + + if (overflow_inst) { + EraseInstruction(block, overflow_inst); + inst->DecrementRemainingUses(); + + code->seta(overflow.cvt8()); + } +} + +void EmitX64::EmitSignedSaturation(IR::Block& block, IR::Inst* inst) { + auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + IR::Value a = inst->GetArg(0); + size_t N = inst->GetArg(1).GetU8(); + ASSERT(N >= 1 && N <= 32); + + if (N == 32) { + reg_alloc.RegisterAddDef(inst, a); + if (overflow_inst) { + auto no_overflow = IR::Value(false); + overflow_inst->ReplaceUsesWith(no_overflow); + } + return; + } + + u32 mask = (1u << N) - 1; + u32 positive_saturated_value = (1u << (N - 1)) - 1; + u32 negative_saturated_value = 1u << (N - 1); + u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value); + + Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32(); + Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32(); + + // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value. + code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]); + + // Put the appropriate saturated value in result + code->cmp(reg_a, positive_saturated_value); + code->mov(tmp, positive_saturated_value); + code->mov(result, sext_negative_satured_value); + code->cmovg(result, tmp); + + // Do the saturation + code->cmp(overflow, mask); + code->cmovbe(result, reg_a); + + if (overflow_inst) { + EraseInstruction(block, overflow_inst); + inst->DecrementRemainingUses(); + + code->seta(overflow.cvt8()); + } +} + /** * Extracts the most significant bits from each of the packed bytes, and packs them together. * diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 5db49c7c..a58ec842 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -348,6 +348,20 @@ IREmitter::ResultAndOverflow IREmitter::SignedSaturatedSub(const Value& a, const return {result, overflow}; } +IREmitter::ResultAndOverflow IREmitter::UnsignedSaturation(const Value& a, size_t bit_size_to_saturate_to) { + ASSERT(bit_size_to_saturate_to <= 31); + auto result = Inst(Opcode::UnsignedSaturation, {a, Imm8(static_cast(bit_size_to_saturate_to))}); + auto overflow = Inst(Opcode::GetOverflowFromOp, {result}); + return {result, overflow}; +} + +IREmitter::ResultAndOverflow IREmitter::SignedSaturation(const Value& a, size_t bit_size_to_saturate_to) { + ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32); + auto result = Inst(Opcode::SignedSaturation, {a, Imm8(static_cast(bit_size_to_saturate_to))}); + auto overflow = Inst(Opcode::GetOverflowFromOp, {result}); + return {result, overflow}; +} + IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) { auto result = Inst(Opcode::PackedAddU8, {a, b}); auto ge = Inst(Opcode::GetGEFromOp, {result}); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 015002f0..0a3d0327 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -138,6 +138,8 @@ public: ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b); ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b); + ResultAndOverflow UnsignedSaturation(const Value& a, size_t bit_size_to_saturate_to); + ResultAndOverflow SignedSaturation(const Value& a, size_t bit_size_to_saturate_to); ResultAndGE PackedAddU8(const Value& a, const Value& b); ResultAndGE PackedAddS8(const Value& a, const Value& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 79d0c247..59c79e94 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -79,6 +79,8 @@ OPCODE(NegateHighWord, T::U32, T::U32 // Saturated instructions OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 ) OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 ) +OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 ) +OPCODE(SignedSaturation, T::U32, T::U32, T::U8 ) // Packed instructions OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )