3
0
Fork 0
forked from suyu/suyu

shader: Implement geometry shaders

This commit is contained in:
ReinUsesLisp 2021-04-12 19:41:22 -03:00 committed by ameerj
parent a6cef71cc0
commit f263760c5a
14 changed files with 277 additions and 91 deletions

View file

@ -140,7 +140,27 @@ Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin
return id;
}
u32 NumVertices(InputTopology input_topology) {
switch (input_topology) {
case InputTopology::Points:
return 1;
case InputTopology::Lines:
return 2;
case InputTopology::LinesAdjacency:
return 4;
case InputTopology::Triangles:
return 3;
case InputTopology::TrianglesAdjacency:
return 6;
}
throw InvalidArgument("Invalid input topology {}", input_topology);
}
Id DefineInput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin = std::nullopt) {
if (ctx.stage == Stage::Geometry) {
const u32 num_vertices{NumVertices(ctx.profile.input_topology)};
type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices));
}
return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);
}
@ -455,12 +475,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
void EmitContext::DefineAttributeMemAccess(const Info& info) {
const auto make_load{[&] {
const bool is_array{stage == Stage::Geometry};
const Id end_block{OpLabel()};
const Id default_label{OpLabel()};
const Id func_type_load{TypeFunction(F32[1], U32[1])};
const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1])
: TypeFunction(F32[1], U32[1])};
const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};
const Id offset{OpFunctionParameter(U32[1])};
const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}};
AddLabel();
const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))};
const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))};
@ -472,7 +496,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
labels.push_back(OpLabel());
}
const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
for (u32 i = 0; i < info.input_generics.size(); i++) {
for (u32 i = 0; i < info.input_generics.size(); ++i) {
if (!info.input_generics[i].used) {
continue;
}
@ -486,7 +510,10 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
size_t label_index{0};
if (info.loads_position) {
AddLabel(labels[label_index]);
const Id result{OpLoad(F32[1], OpAccessChain(input_f32, input_position, masked_index))};
const Id pointer{is_array
? OpAccessChain(input_f32, input_position, vertex, masked_index)
: OpAccessChain(input_f32, input_position, masked_index)};
const Id result{OpLoad(F32[1], pointer)};
OpReturnValue(result);
++label_index;
}
@ -502,7 +529,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
continue;
}
const Id generic_id{input_generics.at(i)};
const Id pointer{OpAccessChain(type->pointer, generic_id, masked_index)};
const Id pointer{is_array
? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
: OpAccessChain(type->pointer, generic_id, masked_index)};
const Id value{OpLoad(type->id, pointer)};
const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
OpReturnValue(result);
@ -910,13 +939,13 @@ void EmitContext::DefineOutputs(const Info& info) {
}
if (info.stores_point_size || profile.fixed_state_point_size) {
if (stage == Stage::Fragment) {
throw NotImplementedException("Storing PointSize in Fragment stage");
throw NotImplementedException("Storing PointSize in fragment stage");
}
output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize);
}
if (info.stores_clip_distance) {
if (stage == Stage::Fragment) {
throw NotImplementedException("Storing PointSize in Fragment stage");
throw NotImplementedException("Storing ClipDistance in fragment stage");
}
const Id type{TypeArray(F32[1], Constant(U32[1], 8U))};
clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance);
@ -924,7 +953,7 @@ void EmitContext::DefineOutputs(const Info& info) {
if (info.stores_viewport_index &&
(profile.support_viewport_index_layer_non_geometry || stage == Shader::Stage::Geometry)) {
if (stage == Stage::Fragment) {
throw NotImplementedException("Storing ViewportIndex in Fragment stage");
throw NotImplementedException("Storing ViewportIndex in fragment stage");
}
viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex);
}

View file

@ -134,6 +134,44 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
case Shader::Stage::VertexB:
execution_model = spv::ExecutionModel::Vertex;
break;
case Shader::Stage::Geometry:
execution_model = spv::ExecutionModel::Geometry;
ctx.AddCapability(spv::Capability::Geometry);
ctx.AddCapability(spv::Capability::GeometryStreams);
switch (ctx.profile.input_topology) {
case InputTopology::Points:
ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints);
break;
case InputTopology::Lines:
ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines);
break;
case InputTopology::LinesAdjacency:
ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency);
break;
case InputTopology::Triangles:
ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles);
break;
case InputTopology::TrianglesAdjacency:
ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency);
break;
}
switch (program.output_topology) {
case OutputTopology::PointList:
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints);
break;
case OutputTopology::LineStrip:
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip);
break;
case OutputTopology::TriangleStrip:
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip);
break;
}
if (program.info.stores_point_size) {
ctx.AddCapability(spv::Capability::GeometryPointSize);
}
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices);
ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations);
break;
case Shader::Stage::Fragment:
execution_model = spv::ExecutionModel::Fragment;
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);

View file

@ -34,8 +34,8 @@ void EmitMemoryBarrierDeviceLevel(EmitContext& ctx);
void EmitMemoryBarrierSystemLevel(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx);
void EmitEmitVertex(EmitContext& ctx, Id stream);
void EmitEndPrimitive(EmitContext& ctx, Id stream);
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
void EmitGetRegister(EmitContext& ctx);
void EmitSetRegister(EmitContext& ctx);
void EmitGetPred(EmitContext& ctx);
@ -51,10 +51,10 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value);
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset);
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value);
void EmitGetZFlag(EmitContext& ctx);

View file

@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <tuple>
#include <utility>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
@ -29,6 +30,15 @@ std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
throw InvalidArgument("Invalid attribute type {}", type);
}
template <typename... Args>
Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
if (ctx.stage == Stage::Geometry) {
return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...);
} else {
return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...);
}
}
std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
const u32 element{static_cast<u32>(attr) % 4};
const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
@ -66,6 +76,31 @@ std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
throw NotImplementedException("Read attribute {}", attr);
}
}
Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
const IR::Value& binding, const IR::Value& offset) {
if (!binding.IsImmediate()) {
throw NotImplementedException("Constant buffer indexing");
}
const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
const Id uniform_type{ctx.uniform_types.*member_ptr};
if (!offset.IsImmediate()) {
Id index{ctx.Def(offset)};
if (element_size > 1) {
const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)};
index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
}
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
return ctx.OpLoad(result_type, access_chain);
}
if (offset.U32() % element_size != 0) {
throw NotImplementedException("Unaligned immediate constant buffer load");
}
const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)};
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
return ctx.OpLoad(result_type, access_chain);
}
} // Anonymous namespace
void EmitGetRegister(EmitContext&) {
@ -100,31 +135,6 @@ void EmitGetIndirectBranchVariable(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr,
u32 element_size, const IR::Value& binding, const IR::Value& offset) {
if (!binding.IsImmediate()) {
throw NotImplementedException("Constant buffer indexing");
}
const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
const Id uniform_type{ctx.uniform_types.*member_ptr};
if (!offset.IsImmediate()) {
Id index{ctx.Def(offset)};
if (element_size > 1) {
const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)};
index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
}
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
return ctx.OpLoad(result_type, access_chain);
}
if (offset.U32() % element_size != 0) {
throw NotImplementedException("Unaligned immediate constant buffer load");
}
const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)};
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
return ctx.OpLoad(result_type, access_chain);
}
Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
return ctx.OpUConvert(ctx.U32[1], load);
@ -157,7 +167,7 @@ Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value&
return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset);
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const u32 element{static_cast<u32>(attr) % 4};
const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
if (IR::IsGeneric(attr)) {
@ -168,7 +178,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
return ctx.Constant(ctx.F32[1], 0.0f);
}
const Id generic_id{ctx.input_generics.at(index)};
const Id pointer{ctx.OpAccessChain(type->pointer, generic_id, element_id())};
const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())};
const Id value{ctx.OpLoad(type->id, pointer)};
return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
}
@ -177,8 +187,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
case IR::Attribute::PositionY:
case IR::Attribute::PositionZ:
case IR::Attribute::PositionW:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()));
return ctx.OpLoad(
ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id()));
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
@ -198,29 +208,32 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
ctx.Constant(ctx.U32[1], std::numeric_limits<u32>::max()),
ctx.u32_zero_value);
case IR::Attribute::PointSpriteS:
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord,
ctx.Constant(ctx.U32[1], 0U)));
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord,
ctx.u32_zero_value));
case IR::Attribute::PointSpriteT:
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord,
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord,
ctx.Constant(ctx.U32[1], 1U)));
default:
throw NotImplementedException("Read attribute {}", attr);
}
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) {
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
const std::optional<Id> output{OutputAttrPointer(ctx, attr)};
if (!output) {
return;
}
if (output) {
ctx.OpStore(*output, value);
}
}
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset) {
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) {
if (ctx.stage == Stage::Geometry) {
return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex);
} else {
return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
}
}
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value) {
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) {
ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
}

View file

@ -5,6 +5,17 @@
#include "shader_recompiler/backend/spirv/emit_spirv.h"
namespace Shader::Backend::SPIRV {
namespace {
void ConvertDepthMode(EmitContext& ctx) {
const Id type{ctx.F32[1]};
const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
const Id z{ctx.OpCompositeExtract(type, position, 2u)};
const Id w{ctx.OpCompositeExtract(type, position, 3u)};
const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
ctx.OpStore(ctx.output_position, vector);
}
} // Anonymous namespace
void EmitPrologue(EmitContext& ctx) {
if (ctx.stage == Stage::VertexB) {
@ -25,23 +36,30 @@ void EmitPrologue(EmitContext& ctx) {
}
void EmitEpilogue(EmitContext& ctx) {
if (ctx.profile.convert_depth_mode) {
const Id type{ctx.F32[1]};
const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
const Id z{ctx.OpCompositeExtract(type, position, 2u)};
const Id w{ctx.OpCompositeExtract(type, position, 3u)};
const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
ctx.OpStore(ctx.output_position, vector);
if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) {
ConvertDepthMode(ctx);
}
}
void EmitEmitVertex(EmitContext& ctx, Id stream) {
ctx.OpEmitStreamVertex(stream);
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
if (ctx.profile.convert_depth_mode) {
ConvertDepthMode(ctx);
}
if (!stream.IsImmediate()) {
// LOG_WARNING(..., "EmitVertex's stream is not constant");
ctx.OpEmitStreamVertex(ctx.u32_zero_value);
return;
}
ctx.OpEmitStreamVertex(ctx.Def(stream));
}
void EmitEndPrimitive(EmitContext& ctx, Id stream) {
ctx.OpEndStreamPrimitive(stream);
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
if (!stream.IsImmediate()) {
// LOG_WARNING(..., "EndPrimitive's stream is not constant");
ctx.OpEndStreamPrimitive(ctx.u32_zero_value);
return;
}
ctx.OpEndStreamPrimitive(ctx.Def(stream));
}
} // namespace Shader::Backend::SPIRV

View file

@ -308,19 +308,27 @@ U1 IREmitter::GetFlowTestResult(FlowTest test) {
}
F32 IREmitter::GetAttribute(IR::Attribute attribute) {
return Inst<F32>(Opcode::GetAttribute, attribute);
return GetAttribute(attribute, Imm32(0));
}
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) {
Inst(Opcode::SetAttribute, attribute, value);
F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
}
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
Inst(Opcode::SetAttribute, attribute, value, vertex);
}
F32 IREmitter::GetAttributeIndexed(const U32& phys_address) {
return Inst<F32>(Opcode::GetAttributeIndexed, phys_address);
return GetAttributeIndexed(phys_address, Imm32(0));
}
void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) {
Inst(Opcode::SetAttributeIndexed, phys_address, value);
F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) {
return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);
}
void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) {
Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
}
void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {

View file

@ -77,10 +77,12 @@ public:
[[nodiscard]] U1 GetFlowTestResult(FlowTest test);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
void SetAttribute(IR::Attribute attribute, const F32& value);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
void SetAttributeIndexed(const U32& phys_address, const F32& value);
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
void SetFragColor(u32 index, u32 component, const F32& value);
void SetFragDepth(const F32& value);

View file

@ -44,10 +44,10 @@ OPCODE(GetCbufS16, U32, U32,
OPCODE(GetCbufU32, U32, U32, U32, )
OPCODE(GetCbufF32, F32, U32, U32, )
OPCODE(GetCbufU32x2, U32x2, U32, U32, )
OPCODE(GetAttribute, F32, Attribute, )
OPCODE(SetAttribute, Void, Attribute, F32, )
OPCODE(GetAttributeIndexed, F32, U32, )
OPCODE(SetAttributeIndexed, Void, U32, F32, )
OPCODE(GetAttribute, F32, Attribute, U32, )
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
OPCODE(GetAttributeIndexed, F32, U32, U32, )
OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
OPCODE(SetFragColor, Void, U32, U32, F32, )
OPCODE(SetFragDepth, Void, F32, )
OPCODE(GetZFlag, U1, Void, )

View file

@ -10,6 +10,7 @@
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/program_header.h"
#include "shader_recompiler/shader_info.h"
#include "shader_recompiler/stage.h"
@ -21,6 +22,9 @@ struct Program {
Info info;
Stage stage{};
std::array<u32, 3> workgroup_size{};
OutputTopology output_topology{};
u32 output_vertices{};
u32 invocations{};
u32 local_memory_size{};
u32 shared_memory_size{};
};

View file

@ -69,9 +69,20 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.post_order_blocks = PostOrder(program.blocks);
program.stage = env.ShaderStage();
program.local_memory_size = env.LocalMemorySize();
if (program.stage == Stage::Compute) {
switch (program.stage) {
case Stage::Geometry: {
const ProgramHeader& sph{env.SPH()};
program.output_topology = sph.common3.output_topology;
program.output_vertices = sph.common4.max_output_vertices;
program.invocations = sph.common2.threads_per_input_primitive;
break;
}
case Stage::Compute:
program.workgroup_size = env.WorkgroupSize();
program.shared_memory_size = env.SharedMemorySize();
break;
default:
break;
}
RemoveUnreachableBlocks(program);

View file

@ -64,7 +64,7 @@ void TranslatorVisitor::ALD(u64 insn) {
BitField<8, 8, IR::Reg> index_reg;
BitField<20, 10, u64> absolute_offset;
BitField<20, 11, s64> relative_offset;
BitField<39, 8, IR::Reg> array_reg;
BitField<39, 8, IR::Reg> vertex_reg;
BitField<32, 1, u64> o;
BitField<31, 1, u64> patch;
BitField<47, 2, Size> size;
@ -80,15 +80,17 @@ void TranslatorVisitor::ALD(u64 insn) {
if (offset % 4 != 0) {
throw NotImplementedException("Unaligned absolute offset {}", offset);
}
const IR::U32 vertex{X(ald.vertex_reg)};
const u32 num_elements{NumElements(ald.size)};
if (ald.index_reg == IR::Reg::RZ) {
for (u32 element = 0; element < num_elements; ++element) {
F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element}));
const IR::Attribute attr{offset / 4 + element};
F(ald.dest_reg + element, ir.GetAttribute(attr, vertex));
}
return;
}
HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset));
F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex));
});
}
@ -100,7 +102,7 @@ void TranslatorVisitor::AST(u64 insn) {
BitField<20, 10, u64> absolute_offset;
BitField<20, 11, s64> relative_offset;
BitField<31, 1, u64> patch;
BitField<39, 8, IR::Reg> array_reg;
BitField<39, 8, IR::Reg> vertex_reg;
BitField<47, 2, Size> size;
} const ast{insn};
@ -114,15 +116,17 @@ void TranslatorVisitor::AST(u64 insn) {
if (offset % 4 != 0) {
throw NotImplementedException("Unaligned absolute offset {}", offset);
}
const IR::U32 vertex{X(ast.vertex_reg)};
const u32 num_elements{NumElements(ast.size)};
if (ast.index_reg == IR::Reg::RZ) {
for (u32 element = 0; element < num_elements; ++element) {
ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element));
const IR::Attribute attr{offset / 4 + element};
ir.SetAttribute(attr, F(ast.src_reg + element), vertex);
}
return;
}
HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element));
ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex);
});
}

View file

@ -18,6 +18,14 @@ enum class AttributeType : u8 {
Disabled,
};
enum class InputTopology {
Points,
Lines,
LinesAdjacency,
Triangles,
TrianglesAdjacency,
};
struct Profile {
u32 supported_spirv{0x00010000};
@ -46,6 +54,8 @@ struct Profile {
std::array<AttributeType, 32> generic_input_types{};
bool convert_depth_mode{};
InputTopology input_topology{};
std::optional<float> fixed_state_point_size;
};

View file

@ -769,7 +769,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const size_t stage_index{index - 1};
infos[stage_index] = &program.info;
const Shader::Profile profile{MakeProfile(key, program.stage)};
const Shader::Profile profile{MakeProfile(key, program)};
const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
device.SaveShader(code);
modules[stage_index] = BuildShader(device, code);
@ -880,15 +880,59 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA
}
Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
Shader::Stage stage) {
const Shader::IR::Program& program) {
Shader::Profile profile{base_profile};
if (stage == Shader::Stage::VertexB) {
profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0;
const Shader::Stage stage{program.stage};
const bool has_geometry{key.unique_hashes[4] != u128{}};
const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
const float point_size{Common::BitCast<float>(key.state.point_size)};
switch (stage) {
case Shader::Stage::VertexB:
if (!has_geometry) {
if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
profile.fixed_state_point_size = Common::BitCast<float>(key.state.point_size);
profile.fixed_state_point_size = point_size;
}
profile.convert_depth_mode = gl_ndc;
}
std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(),
&CastAttributeType);
break;
case Shader::Stage::Geometry:
if (program.output_topology == Shader::OutputTopology::PointList) {
profile.fixed_state_point_size = point_size;
}
profile.convert_depth_mode = gl_ndc;
break;
default:
break;
}
switch (key.state.topology) {
case Maxwell::PrimitiveTopology::Points:
profile.input_topology = Shader::InputTopology::Points;
break;
case Maxwell::PrimitiveTopology::Lines:
case Maxwell::PrimitiveTopology::LineLoop:
case Maxwell::PrimitiveTopology::LineStrip:
profile.input_topology = Shader::InputTopology::Lines;
break;
case Maxwell::PrimitiveTopology::Triangles:
case Maxwell::PrimitiveTopology::TriangleStrip:
case Maxwell::PrimitiveTopology::TriangleFan:
case Maxwell::PrimitiveTopology::Quads:
case Maxwell::PrimitiveTopology::QuadStrip:
case Maxwell::PrimitiveTopology::Polygon:
case Maxwell::PrimitiveTopology::Patches:
profile.input_topology = Shader::InputTopology::Triangles;
break;
case Maxwell::PrimitiveTopology::LinesAdjacency:
case Maxwell::PrimitiveTopology::LineStripAdjacency:
profile.input_topology = Shader::InputTopology::LinesAdjacency;
break;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
profile.input_topology = Shader::InputTopology::TrianglesAdjacency;
break;
}
return profile;
}

View file

@ -33,6 +33,10 @@ namespace Core {
class System;
}
namespace Shader::IR {
struct Program;
}
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@ -160,7 +164,8 @@ private:
Shader::Environment& env,
bool build_in_parallel);
Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage);
Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key,
const Shader::IR::Program& program);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;