forked from suyu/suyu
shader: Refactor PTP and other minor changes
This commit is contained in:
parent
b5db38f50e
commit
d9c5bd9509
14 changed files with 67 additions and 123 deletions
|
@ -169,7 +169,6 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
||||||
AddCapability(spv::Capability::Float64);
|
AddCapability(spv::Capability::Float64);
|
||||||
F64.Define(*this, TypeFloat(64), "f64");
|
F64.Define(*this, TypeFloat(64), "f64");
|
||||||
}
|
}
|
||||||
array_U32x2 = Name(TypeArray(U32[2], Constant(U32[1], 4U)), "array-u32x2");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineCommonConstants() {
|
void EmitContext::DefineCommonConstants() {
|
||||||
|
@ -352,20 +351,19 @@ void EmitContext::DefineOutputs(const Info& info) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (stage == Stage::Fragment) {
|
if (stage == Stage::Fragment) {
|
||||||
for (size_t i = 0; i < 8; ++i) {
|
for (u32 index = 0; index < 8; ++index) {
|
||||||
if (!info.stores_frag_color[i]) {
|
if (!info.stores_frag_color[index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
frag_color[i] = DefineOutput(*this, F32[4]);
|
frag_color[index] = DefineOutput(*this, F32[4]);
|
||||||
Decorate(frag_color[i], spv::Decoration::Location, static_cast<u32>(i));
|
Decorate(frag_color[index], spv::Decoration::Location, index);
|
||||||
Name(frag_color[i], fmt::format("frag_color{}", i));
|
Name(frag_color[index], fmt::format("frag_color{}", index));
|
||||||
}
|
}
|
||||||
if (!info.stores_frag_depth) {
|
if (info.stores_frag_depth) {
|
||||||
return;
|
frag_depth = DefineOutput(*this, F32[1]);
|
||||||
|
Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
|
||||||
|
Name(frag_depth, "frag_depth");
|
||||||
}
|
}
|
||||||
frag_depth = DefineOutput(*this, F32[1]);
|
|
||||||
Decorate(frag_depth, spv::Decoration::BuiltIn, static_cast<u32>(spv::BuiltIn::FragDepth));
|
|
||||||
Name(frag_depth, "frag_depth");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,6 @@ public:
|
||||||
VectorTypes U32;
|
VectorTypes U32;
|
||||||
VectorTypes F16;
|
VectorTypes F16;
|
||||||
VectorTypes F64;
|
VectorTypes F64;
|
||||||
Id array_U32x2;
|
|
||||||
|
|
||||||
Id true_value{};
|
Id true_value{};
|
||||||
Id false_value{};
|
Id false_value{};
|
||||||
|
|
|
@ -95,7 +95,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
|
||||||
Id value);
|
Id value);
|
||||||
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
Id value);
|
Id value);
|
||||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
|
||||||
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
||||||
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
||||||
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
|
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
|
||||||
|
@ -104,7 +104,7 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
|
||||||
Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
|
||||||
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
||||||
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
||||||
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
|
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
|
||||||
|
@ -113,7 +113,7 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
|
||||||
Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
|
||||||
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
||||||
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
||||||
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
|
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
|
||||||
|
@ -122,7 +122,6 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
|
||||||
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
|
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
|
||||||
Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
|
|
||||||
void EmitCompositeConstructF64x2(EmitContext& ctx);
|
void EmitCompositeConstructF64x2(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF64x3(EmitContext& ctx);
|
void EmitCompositeConstructF64x3(EmitContext& ctx);
|
||||||
void EmitCompositeConstructF64x4(EmitContext& ctx);
|
void EmitCompositeConstructF64x4(EmitContext& ctx);
|
||||||
|
@ -359,10 +358,10 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
||||||
Id coords, Id dref, Id bias_lc, Id offset);
|
Id coords, Id dref, Id bias_lc, Id offset);
|
||||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||||
Id coords, Id dref, Id lod_lc, Id offset);
|
Id coords, Id dref, Id lod_lc, Id offset);
|
||||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
Id offset2);
|
const IR::Value& offset, const IR::Value& offset2);
|
||||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
Id offset, Id offset2, Id dref);
|
const IR::Value& offset, const IR::Value& offset2, Id dref);
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
||||||
Id lod, Id ms);
|
Id lod, Id ms);
|
||||||
Id EmitVoteAll(EmitContext& ctx, Id pred);
|
Id EmitVoteAll(EmitContext& ctx, Id pred);
|
||||||
|
|
|
@ -7,11 +7,7 @@
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
|
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
|
||||||
const auto info{inst->Flags<IR::CompositeDecoration>()};
|
|
||||||
if (info.is_constant) {
|
|
||||||
return ctx.ConstantComposite(ctx.U32[2], e1, e2);
|
|
||||||
}
|
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
|
return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,12 +43,7 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index
|
||||||
return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
|
return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
|
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
|
||||||
|
|
||||||
const auto info{inst->Flags<IR::CompositeDecoration>()};
|
|
||||||
if (info.is_constant) {
|
|
||||||
return ctx.ConstantComposite(ctx.F16[2], e1, e2);
|
|
||||||
}
|
|
||||||
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
|
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,11 +79,7 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index
|
||||||
return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
|
return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
|
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
|
||||||
const auto info{inst->Flags<IR::CompositeDecoration>()};
|
|
||||||
if (info.is_constant) {
|
|
||||||
return ctx.ConstantComposite(ctx.F32[2], e1, e2);
|
|
||||||
}
|
|
||||||
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
|
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,15 +151,4 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index
|
||||||
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
|
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) {
|
|
||||||
const auto info{inst->Flags<IR::CompositeDecoration>()};
|
|
||||||
if (info.is_constant) {
|
|
||||||
return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4);
|
|
||||||
}
|
|
||||||
if (ctx.profile.support_variadic_ptp) {
|
|
||||||
return ctx.OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4);
|
|
||||||
}
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -30,16 +30,34 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id offset2) {
|
explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) {
|
||||||
if (Sirit::ValidId(offset)) {
|
if (offset2.IsEmpty()) {
|
||||||
Add(spv::ImageOperandsMask::Offset, offset);
|
if (offset.IsEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (Sirit::ValidId(offset2)) {
|
const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
|
||||||
Add(spv::ImageOperandsMask::ConstOffsets, offset2);
|
if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
|
||||||
|
throw NotImplementedException("Not all arguments in PTP are immediate");
|
||||||
}
|
}
|
||||||
|
const IR::Opcode opcode{values[0]->Opcode()};
|
||||||
|
if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
|
||||||
|
throw LogicError("Invalid PTP arguments");
|
||||||
|
}
|
||||||
|
auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }};
|
||||||
|
|
||||||
|
const Id offsets{
|
||||||
|
ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)),
|
||||||
|
ctx.ConstantComposite(ctx.U32[2], read(0, 0), read(0, 1)),
|
||||||
|
ctx.ConstantComposite(ctx.U32[2], read(0, 2), read(0, 3)),
|
||||||
|
ctx.ConstantComposite(ctx.U32[2], read(1, 0), read(1, 1)),
|
||||||
|
ctx.ConstantComposite(ctx.U32[2], read(1, 2), read(1, 3)))};
|
||||||
|
Add(spv::ImageOperandsMask::ConstOffsets, offsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id lod, Id ms) {
|
explicit ImageOperands(Id offset, Id lod, Id ms) {
|
||||||
if (Sirit::ValidId(lod)) {
|
if (Sirit::ValidId(lod)) {
|
||||||
Add(spv::ImageOperandsMask::Lod, lod);
|
Add(spv::ImageOperandsMask::Lod, lod);
|
||||||
}
|
}
|
||||||
|
@ -197,8 +215,8 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
||||||
Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
|
Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
Id offset2) {
|
const IR::Value& offset, const IR::Value& offset2) {
|
||||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||||
const ImageOperands operands(ctx, offset, offset2);
|
const ImageOperands operands(ctx, offset, offset2);
|
||||||
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
|
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
|
||||||
|
@ -208,7 +226,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
Id offset, Id offset2, Id dref) {
|
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
||||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||||
const ImageOperands operands(ctx, offset, offset2);
|
const ImageOperands operands(ctx, offset, offset2);
|
||||||
return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
|
return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
|
||||||
|
@ -218,7 +236,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
||||||
Id lod, Id ms) {
|
Id lod, Id ms) {
|
||||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||||
const ImageOperands operands(ctx, offset, lod, ms);
|
const ImageOperands operands(offset, lod, ms);
|
||||||
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
|
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
|
||||||
Texture(ctx, index), coords, operands.Mask(), operands.Span());
|
Texture(ctx, index), coords, operands.Mask(), operands.Span());
|
||||||
}
|
}
|
||||||
|
|
|
@ -398,16 +398,15 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
|
||||||
if (e1.Type() != e2.Type()) {
|
if (e1.Type() != e2.Type()) {
|
||||||
throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
|
throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
|
||||||
}
|
}
|
||||||
CompositeDecoration decor{};
|
|
||||||
switch (e1.Type()) {
|
switch (e1.Type()) {
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2);
|
return Inst(Opcode::CompositeConstructU32x2, e1, e2);
|
||||||
case Type::F16:
|
case Type::F16:
|
||||||
return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2);
|
return Inst(Opcode::CompositeConstructF16x2, e1, e2);
|
||||||
case Type::F32:
|
case Type::F32:
|
||||||
return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2);
|
return Inst(Opcode::CompositeConstructF32x2, e1, e2);
|
||||||
case Type::F64:
|
case Type::F64:
|
||||||
return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2);
|
return Inst(Opcode::CompositeConstructF64x2, e1, e2);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(e1.Type());
|
ThrowInvalidType(e1.Type());
|
||||||
}
|
}
|
||||||
|
@ -437,7 +436,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
|
||||||
throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
|
throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
|
||||||
e3.Type(), e4.Type());
|
e3.Type(), e4.Type());
|
||||||
}
|
}
|
||||||
CompositeDecoration decor{};
|
|
||||||
switch (e1.Type()) {
|
switch (e1.Type()) {
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
|
return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
|
||||||
|
@ -447,8 +445,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
|
||||||
return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
|
return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
|
||||||
case Type::F64:
|
case Type::F64:
|
||||||
return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
|
return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
|
||||||
case Type::U32x2:
|
|
||||||
return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4);
|
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(e1.Type());
|
ThrowInvalidType(e1.Type());
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,8 +101,8 @@ public:
|
||||||
|
|
||||||
template <typename FlagsType>
|
template <typename FlagsType>
|
||||||
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
|
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
|
||||||
[[nodiscard]] void SetFlags(FlagsType& new_val) noexcept {
|
[[nodiscard]] void SetFlags(FlagsType value) noexcept {
|
||||||
std::memcpy(&flags, &new_val, sizeof(new_val));
|
std::memcpy(&flags, &value, sizeof(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Intrusively store the host definition of this instruction.
|
/// Intrusively store the host definition of this instruction.
|
||||||
|
|
|
@ -32,11 +32,6 @@ struct FpControl {
|
||||||
};
|
};
|
||||||
static_assert(sizeof(FpControl) <= sizeof(u32));
|
static_assert(sizeof(FpControl) <= sizeof(u32));
|
||||||
|
|
||||||
struct CompositeDecoration {
|
|
||||||
bool is_constant{false};
|
|
||||||
};
|
|
||||||
static_assert(sizeof(CompositeDecoration) <= sizeof(u32));
|
|
||||||
|
|
||||||
union TextureInstInfo {
|
union TextureInstInfo {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 8, TextureType> type;
|
BitField<0, 8, TextureType> type;
|
||||||
|
|
|
@ -126,7 +126,6 @@ OPCODE(CompositeExtractF64x4, F64, F64x
|
||||||
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
|
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
|
||||||
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
|
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
|
||||||
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
|
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
|
||||||
OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, )
|
|
||||||
|
|
||||||
// Select operations
|
// Select operations
|
||||||
OPCODE(SelectU1, U1, U1, U1, U1, )
|
OPCODE(SelectU1, U1, U1, U1, U1, )
|
||||||
|
|
|
@ -44,20 +44,6 @@ bool Value::IsEmpty() const noexcept {
|
||||||
return type == Type::Void;
|
return type == Type::Void;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Value::IsConstantContainer() const {
|
|
||||||
if (IsImmediate()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
ValidateAccess(Type::Opaque);
|
|
||||||
auto num_args = inst->NumArgs();
|
|
||||||
for (size_t i = 0; i < num_args; i++) {
|
|
||||||
if (!inst->Arg(i).IsConstantContainer()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Value::IsImmediate() const noexcept {
|
bool Value::IsImmediate() const noexcept {
|
||||||
if (IsIdentity()) {
|
if (IsIdentity()) {
|
||||||
return inst->Arg(0).IsImmediate();
|
return inst->Arg(0).IsImmediate();
|
||||||
|
|
|
@ -38,7 +38,6 @@ public:
|
||||||
[[nodiscard]] bool IsImmediate() const noexcept;
|
[[nodiscard]] bool IsImmediate() const noexcept;
|
||||||
[[nodiscard]] bool IsLabel() const noexcept;
|
[[nodiscard]] bool IsLabel() const noexcept;
|
||||||
[[nodiscard]] IR::Type Type() const noexcept;
|
[[nodiscard]] IR::Type Type() const noexcept;
|
||||||
[[nodiscard]] bool IsConstantContainer() const;
|
|
||||||
|
|
||||||
[[nodiscard]] IR::Inst* Inst() const;
|
[[nodiscard]] IR::Inst* Inst() const;
|
||||||
[[nodiscard]] IR::Block* Label() const;
|
[[nodiscard]] IR::Block* Label() const;
|
||||||
|
|
|
@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
|
||||||
throw NotImplementedException("Invalid texture type {}", type);
|
throw NotImplementedException("Invalid texture type {}", type);
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
|
std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
|
||||||
const IR::U32 value1{v.X(reg++)};
|
const IR::U32 value1{v.X(reg++)};
|
||||||
const IR::U32 value2{v.X(reg++)};
|
const IR::U32 value2{v.X(reg++)};
|
||||||
const IR::U32 bitsize = v.ir.Imm32(6);
|
const IR::U32 bitsize{v.ir.Imm32(6)};
|
||||||
const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) {
|
const auto make_vector{[&v, &bitsize](const IR::U32& value) {
|
||||||
return v.ir.CompositeConstruct(
|
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
|
||||||
v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true),
|
v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
|
||||||
v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true));
|
v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
|
||||||
});
|
v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
|
||||||
return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16),
|
}};
|
||||||
getVector(value2, 0), getVector(value2, 16));
|
return {make_vector(value1), make_vector(value2)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
|
void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
|
||||||
|
@ -150,14 +150,12 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy
|
||||||
switch (offset_type) {
|
switch (offset_type) {
|
||||||
case OffsetType::None:
|
case OffsetType::None:
|
||||||
break;
|
break;
|
||||||
case OffsetType::AOFFI: {
|
case OffsetType::AOFFI:
|
||||||
offset = MakeOffset(v, meta_reg, tld4.type);
|
offset = MakeOffset(v, meta_reg, tld4.type);
|
||||||
break;
|
break;
|
||||||
}
|
case OffsetType::PTP:
|
||||||
case OffsetType::PTP: {
|
std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
|
||||||
offset2 = MakeOffsetPTP(v, meta_reg);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Invalid offset type {}", offset_type);
|
throw NotImplementedException("Invalid offset type {}", offset_type);
|
||||||
}
|
}
|
||||||
|
@ -167,7 +165,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy
|
||||||
IR::TextureInstInfo info{};
|
IR::TextureInstInfo info{};
|
||||||
info.type.Assign(GetType(tld4.type, tld4.dc != 0));
|
info.type.Assign(GetType(tld4.type, tld4.dc != 0));
|
||||||
info.gather_component.Assign(static_cast<u32>(component_type));
|
info.gather_component.Assign(static_cast<u32>(component_type));
|
||||||
const IR::Value sample{[&]() -> IR::Value {
|
const IR::Value sample{[&] {
|
||||||
if (tld4.dc == 0) {
|
if (tld4.dc == 0) {
|
||||||
return v.ir.ImageGather(handle, coords, offset, offset2, info);
|
return v.ir.ImageGather(handle, coords, offset, offset2, info);
|
||||||
}
|
}
|
||||||
|
|
|
@ -355,17 +355,6 @@ void FoldBranchConditional(IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) {
|
|
||||||
for (size_t i = 0; i < amount; i++) {
|
|
||||||
if (!inst.Arg(i).IsConstantContainer()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
auto info{inst.Flags<IR::CompositeDecoration>()};
|
|
||||||
info.is_constant = true;
|
|
||||||
inst.SetFlags(info);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.Opcode()) {
|
||||||
case IR::Opcode::GetRegister:
|
case IR::Opcode::GetRegister:
|
||||||
|
@ -391,13 +380,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||||
case IR::Opcode::SelectF32:
|
case IR::Opcode::SelectF32:
|
||||||
case IR::Opcode::SelectF64:
|
case IR::Opcode::SelectF64:
|
||||||
return FoldSelect(inst);
|
return FoldSelect(inst);
|
||||||
case IR::Opcode::CompositeConstructU32x2:
|
|
||||||
case IR::Opcode::CompositeConstructF16x2:
|
|
||||||
case IR::Opcode::CompositeConstructF32x2:
|
|
||||||
case IR::Opcode::CompositeConstructF64x2:
|
|
||||||
return FoldConstantComposite(inst, 2);
|
|
||||||
case IR::Opcode::CompositeConstructArrayU32x2:
|
|
||||||
return FoldConstantComposite(inst, 4);
|
|
||||||
case IR::Opcode::FPMul32:
|
case IR::Opcode::FPMul32:
|
||||||
return FoldFPMul32(inst);
|
return FoldFPMul32(inst);
|
||||||
case IR::Opcode::LogicalAnd:
|
case IR::Opcode::LogicalAnd:
|
||||||
|
@ -423,12 +405,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||||
return;
|
return;
|
||||||
case IR::Opcode::BitFieldSExtract:
|
case IR::Opcode::BitFieldSExtract:
|
||||||
FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
|
FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
|
||||||
const size_t back_shift = static_cast<size_t>(shift) + static_cast<size_t>(count);
|
const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
|
||||||
if (back_shift > Common::BitSize<s32>()) {
|
if (back_shift > Common::BitSize<s32>()) {
|
||||||
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
|
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
|
||||||
base, shift, count);
|
base, shift, count);
|
||||||
}
|
}
|
||||||
const size_t left_shift = Common::BitSize<s32>() - back_shift;
|
const size_t left_shift{Common::BitSize<s32>() - back_shift};
|
||||||
return static_cast<u32>(static_cast<s32>(base << left_shift) >>
|
return static_cast<u32>(static_cast<s32>(base << left_shift) >>
|
||||||
static_cast<size_t>(Common::BitSize<s32>() - count));
|
static_cast<size_t>(Common::BitSize<s32>() - count));
|
||||||
});
|
});
|
||||||
|
|
|
@ -30,7 +30,6 @@ struct Profile {
|
||||||
bool support_fp32_signed_zero_nan_preserve{};
|
bool support_fp32_signed_zero_nan_preserve{};
|
||||||
bool support_fp64_signed_zero_nan_preserve{};
|
bool support_fp64_signed_zero_nan_preserve{};
|
||||||
bool support_vote{};
|
bool support_vote{};
|
||||||
bool support_variadic_ptp{};
|
|
||||||
bool warp_size_potentially_larger_than_guest{};
|
bool warp_size_potentially_larger_than_guest{};
|
||||||
|
|
||||||
// FClamp is broken and OpFMax + OpFMin should be used instead
|
// FClamp is broken and OpFMax + OpFMin should be used instead
|
||||||
|
|
Loading…
Reference in a new issue