1
0
Fork 0
forked from suyu/suyu

shader: Implement I2F

This commit is contained in:
ReinUsesLisp 2021-03-20 05:04:12 -03:00 committed by ameerj
parent c97d03efb9
commit f91859efd2
17 changed files with 429 additions and 70 deletions

View file

@ -84,6 +84,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/integer_add_three_input.cpp frontend/maxwell/translate/impl/integer_add_three_input.cpp
frontend/maxwell/translate/impl/integer_compare.cpp frontend/maxwell/translate/impl/integer_compare.cpp
frontend/maxwell/translate/impl/integer_compare_and_set.cpp frontend/maxwell/translate/impl/integer_compare_and_set.cpp
frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
frontend/maxwell/translate/impl/integer_funnel_shift.cpp frontend/maxwell/translate/impl/integer_funnel_shift.cpp
frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
frontend/maxwell/translate/impl/integer_popcount.cpp frontend/maxwell/translate/impl/integer_popcount.cpp

View file

@ -89,6 +89,8 @@ Id EmitContext::Def(const IR::Value& value) {
return value.U1() ? true_value : false_value; return value.U1() ? true_value : false_value;
case IR::Type::U32: case IR::Type::U32:
return Constant(U32[1], value.U32()); return Constant(U32[1], value.U32());
case IR::Type::U64:
return Constant(U64, value.U64());
case IR::Type::F32: case IR::Type::F32:
return Constant(F32[1], value.F32()); return Constant(F32[1], value.F32());
case IR::Type::F64: case IR::Type::F64:

View file

@ -243,6 +243,7 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b);
Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg32(EmitContext& ctx, Id value);
Id EmitINeg64(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value);
Id EmitIAbs32(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value);
Id EmitIAbs64(EmitContext& ctx, Id value);
Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift);
Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift);
@ -302,16 +303,28 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value);
Id EmitConvertF32F16(EmitContext& ctx, Id value); Id EmitConvertF32F16(EmitContext& ctx, Id value);
Id EmitConvertF32F64(EmitContext& ctx, Id value); Id EmitConvertF32F64(EmitContext& ctx, Id value);
Id EmitConvertF64F32(EmitContext& ctx, Id value); Id EmitConvertF64F32(EmitContext& ctx, Id value);
Id EmitConvertF16S8(EmitContext& ctx, Id value);
Id EmitConvertF16S16(EmitContext& ctx, Id value);
Id EmitConvertF16S32(EmitContext& ctx, Id value); Id EmitConvertF16S32(EmitContext& ctx, Id value);
Id EmitConvertF16S64(EmitContext& ctx, Id value); Id EmitConvertF16S64(EmitContext& ctx, Id value);
Id EmitConvertF16U8(EmitContext& ctx, Id value);
Id EmitConvertF16U16(EmitContext& ctx, Id value);
Id EmitConvertF16U32(EmitContext& ctx, Id value); Id EmitConvertF16U32(EmitContext& ctx, Id value);
Id EmitConvertF16U64(EmitContext& ctx, Id value); Id EmitConvertF16U64(EmitContext& ctx, Id value);
Id EmitConvertF32S8(EmitContext& ctx, Id value);
Id EmitConvertF32S16(EmitContext& ctx, Id value);
Id EmitConvertF32S32(EmitContext& ctx, Id value); Id EmitConvertF32S32(EmitContext& ctx, Id value);
Id EmitConvertF32S64(EmitContext& ctx, Id value); Id EmitConvertF32S64(EmitContext& ctx, Id value);
Id EmitConvertF32U8(EmitContext& ctx, Id value);
Id EmitConvertF32U16(EmitContext& ctx, Id value);
Id EmitConvertF32U32(EmitContext& ctx, Id value); Id EmitConvertF32U32(EmitContext& ctx, Id value);
Id EmitConvertF32U64(EmitContext& ctx, Id value); Id EmitConvertF32U64(EmitContext& ctx, Id value);
Id EmitConvertF64S8(EmitContext& ctx, Id value);
Id EmitConvertF64S16(EmitContext& ctx, Id value);
Id EmitConvertF64S32(EmitContext& ctx, Id value); Id EmitConvertF64S32(EmitContext& ctx, Id value);
Id EmitConvertF64S64(EmitContext& ctx, Id value); Id EmitConvertF64S64(EmitContext& ctx, Id value);
Id EmitConvertF64U8(EmitContext& ctx, Id value);
Id EmitConvertF64U16(EmitContext& ctx, Id value);
Id EmitConvertF64U32(EmitContext& ctx, Id value); Id EmitConvertF64U32(EmitContext& ctx, Id value);
Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitBindlessImageSampleImplicitLod(EmitContext&); Id EmitBindlessImageSampleImplicitLod(EmitContext&);

View file

@ -102,6 +102,14 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) {
return ctx.OpFConvert(ctx.F64[1], value); return ctx.OpFConvert(ctx.F64[1], value);
} }
Id EmitConvertF16S8(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F16[1], value);
}
Id EmitConvertF16S16(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F16[1], value);
}
Id EmitConvertF16S32(EmitContext& ctx, Id value) { Id EmitConvertF16S32(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F16[1], value); return ctx.OpConvertSToF(ctx.F16[1], value);
} }
@ -110,6 +118,14 @@ Id EmitConvertF16S64(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F16[1], value); return ctx.OpConvertSToF(ctx.F16[1], value);
} }
Id EmitConvertF16U8(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F16[1], value);
}
Id EmitConvertF16U16(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F16[1], value);
}
Id EmitConvertF16U32(EmitContext& ctx, Id value) { Id EmitConvertF16U32(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F16[1], value); return ctx.OpConvertUToF(ctx.F16[1], value);
} }
@ -118,6 +134,14 @@ Id EmitConvertF16U64(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F16[1], value); return ctx.OpConvertUToF(ctx.F16[1], value);
} }
Id EmitConvertF32S8(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value));
}
Id EmitConvertF32S16(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value));
}
Id EmitConvertF32S32(EmitContext& ctx, Id value) { Id EmitConvertF32S32(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F32[1], value); return ctx.OpConvertSToF(ctx.F32[1], value);
} }
@ -126,6 +150,14 @@ Id EmitConvertF32S64(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F32[1], value); return ctx.OpConvertSToF(ctx.F32[1], value);
} }
Id EmitConvertF32U8(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value));
}
Id EmitConvertF32U16(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value));
}
Id EmitConvertF32U32(EmitContext& ctx, Id value) { Id EmitConvertF32U32(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F32[1], value); return ctx.OpConvertUToF(ctx.F32[1], value);
} }
@ -134,6 +166,14 @@ Id EmitConvertF32U64(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F32[1], value); return ctx.OpConvertUToF(ctx.F32[1], value);
} }
Id EmitConvertF64S8(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value));
}
Id EmitConvertF64S16(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value));
}
Id EmitConvertF64S32(EmitContext& ctx, Id value) { Id EmitConvertF64S32(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F64[1], value); return ctx.OpConvertSToF(ctx.F64[1], value);
} }
@ -142,6 +182,14 @@ Id EmitConvertF64S64(EmitContext& ctx, Id value) {
return ctx.OpConvertSToF(ctx.F64[1], value); return ctx.OpConvertSToF(ctx.F64[1], value);
} }
Id EmitConvertF64U8(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value));
}
Id EmitConvertF64U16(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value));
}
Id EmitConvertF64U32(EmitContext& ctx, Id value) { Id EmitConvertF64U32(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F64[1], value); return ctx.OpConvertUToF(ctx.F64[1], value);
} }

View file

@ -70,6 +70,10 @@ Id EmitIAbs32(EmitContext& ctx, Id value) {
return ctx.OpSAbs(ctx.U32[1], value); return ctx.OpSAbs(ctx.U32[1], value);
} }
Id EmitIAbs64(EmitContext& ctx, Id value) {
return ctx.OpSAbs(ctx.U64, value);
}
Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
} }

View file

@ -53,6 +53,10 @@ U64 IREmitter::Imm64(u64 value) const {
return U64{Value{value}}; return U64{Value{value}};
} }
U64 IREmitter::Imm64(s64 value) const {
return U64{Value{static_cast<u64>(value)}};
}
F64 IREmitter::Imm64(f64 value) const { F64 IREmitter::Imm64(f64 value) const {
return F64{Value{value}}; return F64{Value{value}};
} }
@ -363,7 +367,7 @@ U1 IREmitter::GetSparseFromOp(const Value& op) {
} }
F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
if (a.Type() != a.Type()) { if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
} }
switch (a.Type()) { switch (a.Type()) {
@ -974,8 +978,15 @@ U32U64 IREmitter::INeg(const U32U64& value) {
} }
} }
U32 IREmitter::IAbs(const U32& value) { U32U64 IREmitter::IAbs(const U32U64& value) {
return Inst<U32>(Opcode::IAbs32, value); switch (value.Type()) {
case Type::U32:
return Inst<U32>(Opcode::IAbs32, value);
case Type::U64:
return Inst<U64>(Opcode::IAbs64, value);
default:
ThrowInvalidType(value.Type());
}
} }
U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
@ -1074,8 +1085,25 @@ U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
} }
U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
return Inst<U1>(Opcode::IEqual, lhs, rhs); if (lhs.Type() != rhs.Type()) {
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(Opcode::IEqual, lhs, rhs);
case Type::U64: {
// Manually compare the unpacked values
const Value lhs_vector{UnpackUint2x32(lhs)};
const Value rhs_vector{UnpackUint2x32(rhs)};
return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
IR::U32{CompositeExtract(rhs_vector, 0)}),
IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
IR::U32{CompositeExtract(rhs_vector, 1)}));
}
default:
ThrowInvalidType(lhs.Type());
}
} }
U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
@ -1198,79 +1226,96 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
} }
} }
F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) {
switch (bitsize) { switch (dest_bitsize) {
case 16: case 16:
switch (value.Type()) { switch (src_bitsize) {
case Type::U32: case 8:
return Inst<F16>(Opcode::ConvertF16S8, value);
case 16:
return Inst<F16>(Opcode::ConvertF16S16, value);
case 32:
return Inst<F16>(Opcode::ConvertF16S32, value); return Inst<F16>(Opcode::ConvertF16S32, value);
case Type::U64: case 64:
return Inst<F16>(Opcode::ConvertF16S64, value); return Inst<F16>(Opcode::ConvertF16S64, value);
default:
ThrowInvalidType(value.Type());
} }
break;
case 32: case 32:
switch (value.Type()) { switch (src_bitsize) {
case Type::U32: case 8:
return Inst<F32>(Opcode::ConvertF32S8, value);
case 16:
return Inst<F32>(Opcode::ConvertF32S16, value);
case 32:
return Inst<F32>(Opcode::ConvertF32S32, value); return Inst<F32>(Opcode::ConvertF32S32, value);
case Type::U64: case 64:
return Inst<F32>(Opcode::ConvertF32S64, value); return Inst<F32>(Opcode::ConvertF32S64, value);
default:
ThrowInvalidType(value.Type());
} }
break;
case 64: case 64:
switch (value.Type()) { switch (src_bitsize) {
case Type::U32: case 8:
return Inst<F16>(Opcode::ConvertF64S32, value); return Inst<F64>(Opcode::ConvertF64S8, value);
case Type::U64: case 16:
return Inst<F16>(Opcode::ConvertF64S64, value); return Inst<F64>(Opcode::ConvertF64S16, value);
default: case 32:
ThrowInvalidType(value.Type()); return Inst<F64>(Opcode::ConvertF64S32, value);
case 64:
return Inst<F64>(Opcode::ConvertF64S64, value);
} }
default: break;
throw InvalidArgument("Invalid destination bitsize {}", bitsize);
} }
throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
} }
F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) {
switch (bitsize) { switch (dest_bitsize) {
case 16: case 16:
switch (value.Type()) { switch (src_bitsize) {
case Type::U32: case 8:
return Inst<F16>(Opcode::ConvertF16U8, value);
case 16:
return Inst<F16>(Opcode::ConvertF16U16, value);
case 32:
return Inst<F16>(Opcode::ConvertF16U32, value); return Inst<F16>(Opcode::ConvertF16U32, value);
case Type::U64: case 64:
return Inst<F16>(Opcode::ConvertF16U64, value); return Inst<F16>(Opcode::ConvertF16U64, value);
default:
ThrowInvalidType(value.Type());
} }
break;
case 32: case 32:
switch (value.Type()) { switch (src_bitsize) {
case Type::U32: case 8:
return Inst<F32>(Opcode::ConvertF32U8, value);
case 16:
return Inst<F32>(Opcode::ConvertF32U16, value);
case 32:
return Inst<F32>(Opcode::ConvertF32U32, value); return Inst<F32>(Opcode::ConvertF32U32, value);
case Type::U64: case 64:
return Inst<F32>(Opcode::ConvertF32U64, value); return Inst<F32>(Opcode::ConvertF32U64, value);
default:
ThrowInvalidType(value.Type());
} }
break;
case 64: case 64:
switch (value.Type()) { switch (src_bitsize) {
case Type::U32: case 8:
return Inst<F16>(Opcode::ConvertF64U32, value); return Inst<F64>(Opcode::ConvertF64U8, value);
case Type::U64: case 16:
return Inst<F16>(Opcode::ConvertF64U64, value); return Inst<F64>(Opcode::ConvertF64U16, value);
default: case 32:
ThrowInvalidType(value.Type()); return Inst<F64>(Opcode::ConvertF64U32, value);
case 64:
return Inst<F64>(Opcode::ConvertF64U64, value);
} }
default: break;
throw InvalidArgument("Invalid destination bitsize {}", bitsize);
} }
throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
} }
F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
const Value& value) {
if (is_signed) { if (is_signed) {
return ConvertSToF(bitsize, value); return ConvertSToF(dest_bitsize, src_bitsize, value);
} else { } else {
return ConvertUToF(bitsize, value); return ConvertUToF(dest_bitsize, src_bitsize, value);
} }
} }

View file

@ -29,6 +29,7 @@ public:
[[nodiscard]] U32 Imm32(s32 value) const; [[nodiscard]] U32 Imm32(s32 value) const;
[[nodiscard]] F32 Imm32(f32 value) const; [[nodiscard]] F32 Imm32(f32 value) const;
[[nodiscard]] U64 Imm64(u64 value) const; [[nodiscard]] U64 Imm64(u64 value) const;
[[nodiscard]] U64 Imm64(s64 value) const;
[[nodiscard]] F64 Imm64(f64 value) const; [[nodiscard]] F64 Imm64(f64 value) const;
void Branch(Block* label); void Branch(Block* label);
@ -170,7 +171,7 @@ public:
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32U64 INeg(const U32U64& value);
[[nodiscard]] U32 IAbs(const U32& value); [[nodiscard]] U32U64 IAbs(const U32U64& value);
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
@ -193,7 +194,7 @@ public:
[[nodiscard]] U32 UMax(const U32& a, const U32& b); [[nodiscard]] U32 UMax(const U32& a, const U32& b);
[[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
@ -207,9 +208,12 @@ public:
[[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
[[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize,
[[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); const Value& value);
[[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize,
const Value& value);
[[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
const Value& value);
[[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);

View file

@ -247,6 +247,7 @@ OPCODE(IMul32, U32, U32,
OPCODE(INeg32, U32, U32, ) OPCODE(INeg32, U32, U32, )
OPCODE(INeg64, U64, U64, ) OPCODE(INeg64, U64, U64, )
OPCODE(IAbs32, U32, U32, ) OPCODE(IAbs32, U32, U32, )
OPCODE(IAbs64, U64, U64, )
OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, )
OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftLeftLogical64, U64, U64, U32, )
OPCODE(ShiftRightLogical32, U32, U32, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, )
@ -311,16 +312,28 @@ OPCODE(ConvertF16F32, F16, F32,
OPCODE(ConvertF32F16, F32, F16, ) OPCODE(ConvertF32F16, F32, F16, )
OPCODE(ConvertF32F64, F32, F64, ) OPCODE(ConvertF32F64, F32, F64, )
OPCODE(ConvertF64F32, F64, F32, ) OPCODE(ConvertF64F32, F64, F32, )
OPCODE(ConvertF16S8, F16, U32, )
OPCODE(ConvertF16S16, F16, U32, )
OPCODE(ConvertF16S32, F16, U32, ) OPCODE(ConvertF16S32, F16, U32, )
OPCODE(ConvertF16S64, F16, U64, ) OPCODE(ConvertF16S64, F16, U64, )
OPCODE(ConvertF16U8, F16, U32, )
OPCODE(ConvertF16U16, F16, U32, )
OPCODE(ConvertF16U32, F16, U32, ) OPCODE(ConvertF16U32, F16, U32, )
OPCODE(ConvertF16U64, F16, U64, ) OPCODE(ConvertF16U64, F16, U64, )
OPCODE(ConvertF32S8, F32, U32, )
OPCODE(ConvertF32S16, F32, U32, )
OPCODE(ConvertF32S32, F32, U32, ) OPCODE(ConvertF32S32, F32, U32, )
OPCODE(ConvertF32S64, F32, U64, ) OPCODE(ConvertF32S64, F32, U64, )
OPCODE(ConvertF32U8, F32, U32, )
OPCODE(ConvertF32U16, F32, U32, )
OPCODE(ConvertF32U32, F32, U32, ) OPCODE(ConvertF32U32, F32, U32, )
OPCODE(ConvertF32U64, F32, U64, ) OPCODE(ConvertF32U64, F32, U64, )
OPCODE(ConvertF64S8, F64, U32, )
OPCODE(ConvertF64S16, F64, U32, )
OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64S32, F64, U32, )
OPCODE(ConvertF64S64, F64, U64, ) OPCODE(ConvertF64S64, F64, U64, )
OPCODE(ConvertF64U8, F64, U32, )
OPCODE(ConvertF64U16, F64, U32, )
OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, )
OPCODE(ConvertF64U64, F64, U64, ) OPCODE(ConvertF64U64, F64, U64, )

View file

@ -121,6 +121,22 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
} }
IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
union {
u64 raw;
BitField<20, 1, u64> unaligned;
} const cbuf{insn};
if (cbuf.unaligned != 0) {
throw NotImplementedException("Unaligned packed constant buffer read");
}
const auto [binding, lower_offset]{CbufAddr(insn)};
const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
}
IR::U32 TranslatorVisitor::GetImm20(u64 insn) { IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
union { union {
u64 raw; u64 raw;
@ -158,6 +174,11 @@ IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
return ir.Imm64(Common::BitCast<f64>(value | sign_bit)); return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
} }
IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
const s64 value{GetImm20(insn).U32()};
return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
}
IR::U32 TranslatorVisitor::GetImm32(u64 insn) { IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
union { union {
u64 raw; u64 raw;

View file

@ -356,10 +356,12 @@ public:
[[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn);
[[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
[[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
[[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
[[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn);
[[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
[[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
[[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
[[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn);
[[nodiscard]] IR::F32 GetFloatImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);

View file

@ -0,0 +1,173 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class FloatFormat : u64 {
F16 = 1,
F32 = 2,
F64 = 3,
};
enum class IntFormat : u64 {
U8 = 0,
U16 = 1,
U32 = 2,
U64 = 3,
};
union Encoding {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, FloatFormat> float_format;
BitField<10, 2, IntFormat> int_format;
BitField<13, 1, u64> is_signed;
BitField<39, 2, FpRounding> fp_rounding;
BitField<41, 2, u64> selector;
BitField<47, 1, u64> cc;
BitField<45, 1, u64> neg;
BitField<49, 1, u64> abs;
};
bool Is64(u64 insn) {
return Encoding{insn}.int_format == IntFormat::U64;
}
int BitSize(FloatFormat format) {
switch (format) {
case FloatFormat::F16:
return 16;
case FloatFormat::F32:
return 32;
case FloatFormat::F64:
return 64;
}
throw NotImplementedException("Invalid float format {}", format);
}
IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
const IR::U1 is_least{v.ir.IEqual(value, least_value)};
return IR::U32{v.ir.Select(is_least, value, absolute)};
}
void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
const Encoding i2f{insn};
if (i2f.cc != 0) {
throw NotImplementedException("CC");
}
const bool is_signed{i2f.is_signed != 0};
int src_bitsize{};
switch (i2f.int_format) {
case IntFormat::U8:
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
v.ir.Imm32(8), is_signed);
if (i2f.abs != 0) {
src = SmallAbs(v, src, 8);
}
src_bitsize = 8;
break;
case IntFormat::U16:
if (i2f.selector == 1 || i2f.selector == 3) {
throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
}
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
v.ir.Imm32(16), is_signed);
if (i2f.abs != 0) {
src = SmallAbs(v, src, 16);
}
src_bitsize = 16;
break;
case IntFormat::U32:
case IntFormat::U64:
if (i2f.selector != 0) {
throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
}
if (i2f.abs != 0 && is_signed) {
src = v.ir.IAbs(src);
}
src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
break;
}
const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
const int dst_bitsize{BitSize(i2f.float_format)};
IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)};
if (i2f.neg != 0) {
if (i2f.abs != 0 || !is_signed) {
// We know the value is positive
value = v.ir.FPNeg(value);
} else {
// Only negate if the input isn't the lowest value
IR::U1 is_least;
if (src_bitsize == 64) {
is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
} else {
const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
is_least = v.ir.IEqual(src, least_value);
}
value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
}
}
switch (i2f.float_format) {
case FloatFormat::F16: {
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
break;
}
case FloatFormat::F32:
v.F(i2f.dest_reg, value);
break;
case FloatFormat::F64: {
if (!IR::IsAligned(i2f.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
}
const IR::Value vector{v.ir.UnpackDouble2x32(value)};
for (int i = 0; i < 2; ++i) {
v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)});
}
break;
}
default:
throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
}
}
} // Anonymous namespace
void TranslatorVisitor::I2F_reg(u64 insn) {
if (Is64(insn)) {
union {
u64 raw;
BitField<20, 8, IR::Reg> reg;
} const value{insn};
const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
I2F(*this, insn, ir.PackUint2x32(regs));
} else {
I2F(*this, insn, GetReg20(insn));
}
}
void TranslatorVisitor::I2F_cbuf(u64 insn) {
if (Is64(insn)) {
I2F(*this, insn, GetPackedCbuf(insn));
} else {
I2F(*this, insn, GetCbuf(insn));
}
}
void TranslatorVisitor::I2F_imm(u64 insn) {
if (Is64(insn)) {
I2F(*this, insn, GetPackedImm20(insn));
} else {
I2F(*this, insn, GetImm20(insn));
}
}
} // namespace Shader::Maxwell

View file

@ -241,18 +241,6 @@ void TranslatorVisitor::HSETP2_imm(u64) {
ThrowNotImplemented(Opcode::HSETP2_imm); ThrowNotImplemented(Opcode::HSETP2_imm);
} }
void TranslatorVisitor::I2F_reg(u64) {
ThrowNotImplemented(Opcode::I2F_reg);
}
void TranslatorVisitor::I2F_cbuf(u64) {
ThrowNotImplemented(Opcode::I2F_cbuf);
}
void TranslatorVisitor::I2F_imm(u64) {
ThrowNotImplemented(Opcode::I2F_imm);
}
void TranslatorVisitor::IDE(u64) { void TranslatorVisitor::IDE(u64) {
ThrowNotImplemented(Opcode::IDE); ThrowNotImplemented(Opcode::IDE);
} }

View file

@ -56,7 +56,7 @@ Shader::TextureType GetType(TextureType type, bool dc) {
} }
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
switch (type) { switch (type) {
case TextureType::_1D: case TextureType::_1D:
return v.F(reg); return v.F(reg);

View file

@ -65,7 +65,7 @@ IR::Value Composite(TranslatorVisitor& v, Args... regs) {
} }
IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
} }
IR::Value Sample(TranslatorVisitor& v, u64 insn) { IR::Value Sample(TranslatorVisitor& v, u64 insn) {

View file

@ -79,6 +79,14 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU16F16:
case IR::Opcode::ConvertU32F16: case IR::Opcode::ConvertU32F16:
case IR::Opcode::ConvertU64F16: case IR::Opcode::ConvertU64F16:
case IR::Opcode::ConvertF16S8:
case IR::Opcode::ConvertF16S16:
case IR::Opcode::ConvertF16S32:
case IR::Opcode::ConvertF16S64:
case IR::Opcode::ConvertF16U8:
case IR::Opcode::ConvertF16U16:
case IR::Opcode::ConvertF16U32:
case IR::Opcode::ConvertF16U64:
case IR::Opcode::FPAbs16: case IR::Opcode::FPAbs16:
case IR::Opcode::FPAdd16: case IR::Opcode::FPAdd16:
case IR::Opcode::FPCeil16: case IR::Opcode::FPCeil16:
@ -105,6 +113,14 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::FPRoundEven64: case IR::Opcode::FPRoundEven64:
case IR::Opcode::FPSaturate64: case IR::Opcode::FPSaturate64:
case IR::Opcode::FPTrunc64: case IR::Opcode::FPTrunc64:
case IR::Opcode::ConvertF64S8:
case IR::Opcode::ConvertF64S16:
case IR::Opcode::ConvertF64S32:
case IR::Opcode::ConvertF64S64:
case IR::Opcode::ConvertF64U8:
case IR::Opcode::ConvertF64U16:
case IR::Opcode::ConvertF64U32:
case IR::Opcode::ConvertF64U64:
info.uses_fp64 = true; info.uses_fp64 = true;
break; break;
default: default:
@ -123,6 +139,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageU8:
case IR::Opcode::WriteStorageS8: case IR::Opcode::WriteStorageS8:
case IR::Opcode::SelectU8: case IR::Opcode::SelectU8:
case IR::Opcode::ConvertF16S8:
case IR::Opcode::ConvertF16U8:
case IR::Opcode::ConvertF32S8:
case IR::Opcode::ConvertF32U8:
case IR::Opcode::ConvertF64S8:
case IR::Opcode::ConvertF64U8:
info.uses_int8 = true; info.uses_int8 = true;
break; break;
default: default:
@ -149,6 +171,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU16F16:
case IR::Opcode::ConvertU16F32: case IR::Opcode::ConvertU16F32:
case IR::Opcode::ConvertU16F64: case IR::Opcode::ConvertU16F64:
case IR::Opcode::ConvertF16S16:
case IR::Opcode::ConvertF16U16:
case IR::Opcode::ConvertF32S16:
case IR::Opcode::ConvertF32U16:
case IR::Opcode::ConvertF64S16:
case IR::Opcode::ConvertF64U16:
info.uses_int16 = true; info.uses_int16 = true;
break; break;
default: default:

View file

@ -70,6 +70,22 @@ IR::Opcode Replace(IR::Opcode op) {
return IR::Opcode::Identity; return IR::Opcode::Identity;
case IR::Opcode::ConvertF16F32: case IR::Opcode::ConvertF16F32:
return IR::Opcode::Identity; return IR::Opcode::Identity;
case IR::Opcode::ConvertF16S8:
return IR::Opcode::ConvertF32S8;
case IR::Opcode::ConvertF16S16:
return IR::Opcode::ConvertF32S16;
case IR::Opcode::ConvertF16S32:
return IR::Opcode::ConvertF32S32;
case IR::Opcode::ConvertF16S64:
return IR::Opcode::ConvertF32S64;
case IR::Opcode::ConvertF16U8:
return IR::Opcode::ConvertF32U8;
case IR::Opcode::ConvertF16U16:
return IR::Opcode::ConvertF32U16;
case IR::Opcode::ConvertF16U32:
return IR::Opcode::ConvertF32U32;
case IR::Opcode::ConvertF16U64:
return IR::Opcode::ConvertF32U64;
default: default:
return op; return op;
} }

View file

@ -227,6 +227,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {
const auto& float_control{device.FloatControlProperties()}; const auto& float_control{device.FloatControlProperties()};
const VkDriverIdKHR driver_id{device.GetDriverID()};
profile = Shader::Profile{ profile = Shader::Profile{
.unified_descriptor_binding = true, .unified_descriptor_binding = true,
.support_float_controls = true, .support_float_controls = true,
@ -242,7 +243,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
.support_fp32_signed_zero_nan_preserve = .support_fp32_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.has_broken_spirv_clamp = true, // TODO: is_intel .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
}; };
} }