glasm: Use storage buffers instead of global memory when possible
This commit is contained in:
parent
f58f79c85d
commit
adb591a757
17 changed files with 503 additions and 437 deletions
|
@ -4,7 +4,6 @@ add_library(shader_recompiler STATIC
|
||||||
backend/glasm/emit_context.h
|
backend/glasm/emit_context.h
|
||||||
backend/glasm/emit_glasm.cpp
|
backend/glasm/emit_glasm.cpp
|
||||||
backend/glasm/emit_glasm.h
|
backend/glasm/emit_glasm.h
|
||||||
backend/glasm/emit_glasm_atomic.cpp
|
|
||||||
backend/glasm/emit_glasm_barriers.cpp
|
backend/glasm/emit_glasm_barriers.cpp
|
||||||
backend/glasm/emit_glasm_bitwise_conversion.cpp
|
backend/glasm/emit_glasm_bitwise_conversion.cpp
|
||||||
backend/glasm/emit_glasm_composite.cpp
|
backend/glasm/emit_glasm_composite.cpp
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
#include "shader_recompiler/backend/bindings.h"
|
||||||
#include "shader_recompiler/backend/glasm/emit_context.h"
|
#include "shader_recompiler/backend/glasm/emit_context.h"
|
||||||
#include "shader_recompiler/frontend/ir/program.h"
|
#include "shader_recompiler/frontend/ir/program.h"
|
||||||
|
#include "shader_recompiler/profile.h"
|
||||||
|
|
||||||
namespace Shader::Backend::GLASM {
|
namespace Shader::Backend::GLASM {
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -40,14 +41,22 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
||||||
Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
|
Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
|
||||||
++cbuf_index;
|
++cbuf_index;
|
||||||
}
|
}
|
||||||
|
u32 ssbo_index{};
|
||||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||||
if (desc.count != 1) {
|
if (desc.count != 1) {
|
||||||
throw NotImplementedException("Storage buffer descriptor array");
|
throw NotImplementedException("Storage buffer descriptor array");
|
||||||
}
|
}
|
||||||
|
if (runtime_info.glasm_use_storage_buffers) {
|
||||||
|
Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
|
||||||
|
++bindings.storage_buffer;
|
||||||
|
++ssbo_index;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (!runtime_info.glasm_use_storage_buffers) {
|
||||||
if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
|
if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
|
||||||
Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
|
Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
stage = program.stage;
|
stage = program.stage;
|
||||||
switch (program.stage) {
|
switch (program.stage) {
|
||||||
case Stage::VertexA:
|
case Stage::VertexA:
|
||||||
|
|
|
@ -15,9 +15,10 @@ namespace Shader::Backend::GLASM {
|
||||||
[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
|
[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
IR::Program& program, Bindings& bindings);
|
IR::Program& program, Bindings& bindings);
|
||||||
|
|
||||||
[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) {
|
[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
|
IR::Program& program) {
|
||||||
Bindings binding;
|
Bindings binding;
|
||||||
return EmitGLASM(profile, {}, program, binding);
|
return EmitGLASM(profile, runtime_info, program, binding);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::GLASM
|
} // namespace Shader::Backend::GLASM
|
||||||
|
|
|
@ -1,351 +0,0 @@
|
||||||
// Copyright 2021 yuzu Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#include "shader_recompiler/backend/glasm/emit_context.h"
|
|
||||||
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
|
|
||||||
#include "shader_recompiler/frontend/ir/value.h"
|
|
||||||
|
|
||||||
namespace Shader::Backend::GLASM {
|
|
||||||
namespace {
|
|
||||||
void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
|
|
||||||
std::string_view then_expr, std::string_view else_expr = {}) {
|
|
||||||
// Operate on bindless SSBO, call the expression with bounds checking
|
|
||||||
// address = c[binding].xy
|
|
||||||
// length = c[binding].z
|
|
||||||
const u32 sb_binding{binding.U32()};
|
|
||||||
ctx.Add("PK64.U DC,c[{}];" // pointer = address
|
|
||||||
"CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
|
|
||||||
"ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
|
|
||||||
"SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
|
|
||||||
sb_binding, offset, offset, sb_binding);
|
|
||||||
if (else_expr.empty()) {
|
|
||||||
ctx.Add("IF NE.x;{}ENDIF;", then_expr);
|
|
||||||
} else {
|
|
||||||
ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename ValueType>
|
|
||||||
void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
|
|
||||||
ValueType value, std::string_view operation, std::string_view size) {
|
|
||||||
const Register ret{ctx.reg_alloc.Define(inst)};
|
|
||||||
StorageOp(ctx, binding, offset,
|
|
||||||
fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarS32 value) {
|
|
||||||
ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarS32 value) {
|
|
||||||
ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
ScalarU32 value) {
|
|
||||||
ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
|
||||||
Register value) {
|
|
||||||
ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "ADD", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarS32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MIN", "S32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MIN", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarS32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MAX", "S32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MAX", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "AND", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "OR", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "XOR", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarU32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "ADD", "U64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MIN", "S64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MIN", "U64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MAX", "S64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MAX", "U64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "AND", "U64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "OR", "U64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "XOR", "U64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, ScalarF32 value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "ADD", "F32");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
|
||||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
|
||||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|
||||||
ScalarU32 offset, Register value) {
|
|
||||||
Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
|
||||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicIAdd32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicSMin32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicUMin32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicSMax32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicUMax32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicInc32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicDec32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicAnd32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicOr32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicXor32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicExchange32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicIAdd64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicSMin64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicUMin64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicSMax64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicUMax64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicInc64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicDec64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicAnd64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicOr64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicXor64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicExchange64(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicAddF32(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicAddF16x2(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicAddF32x2(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicMinF16x2(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicMinF32x2(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicMaxF16x2(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGlobalAtomicMaxF32x2(EmitContext&) {
|
|
||||||
throw NotImplementedException("GLASM instruction");
|
|
||||||
}
|
|
||||||
} // namespace Shader::Backend::GLASM
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
|
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
|
||||||
#include "shader_recompiler/frontend/ir/program.h"
|
#include "shader_recompiler/frontend/ir/program.h"
|
||||||
#include "shader_recompiler/frontend/ir/value.h"
|
#include "shader_recompiler/frontend/ir/value.h"
|
||||||
|
#include "shader_recompiler/profile.h"
|
||||||
|
|
||||||
namespace Shader::Backend::GLASM {
|
namespace Shader::Backend::GLASM {
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -29,7 +30,7 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr,
|
void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
|
||||||
std::string_view else_expr = {}) {
|
std::string_view else_expr = {}) {
|
||||||
const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
|
const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
|
||||||
for (size_t index = 0; index < num_buffers; ++index) {
|
for (size_t index = 0; index < num_buffers; ++index) {
|
||||||
|
@ -45,13 +46,21 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e
|
||||||
"SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1
|
"SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1
|
||||||
"AND.U.CC RC.x,RC.x,RC.y;"
|
"AND.U.CC RC.x,RC.x,RC.y;"
|
||||||
"IF NE.x;" // a && b
|
"IF NE.x;" // a && b
|
||||||
"SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr
|
"SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
|
||||||
"PK64.U DC.y,c[{}];" // host_ssbo = cbuf
|
ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
|
||||||
|
address, address);
|
||||||
|
if (pointer_based) {
|
||||||
|
ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
|
||||||
"ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
|
"ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
|
||||||
"{}"
|
"{}"
|
||||||
"ELSE;",
|
"ELSE;",
|
||||||
ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
|
index, expr);
|
||||||
address, address, index, then_expr);
|
} else {
|
||||||
|
ctx.Add("CVT.U32.U64 RC.x,DC.x;"
|
||||||
|
"{},ssbo{}[RC.x];"
|
||||||
|
"ELSE;",
|
||||||
|
expr, index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!else_expr.empty()) {
|
if (!else_expr.empty()) {
|
||||||
ctx.Add("{}", else_expr);
|
ctx.Add("{}", else_expr);
|
||||||
|
@ -64,26 +73,55 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
|
void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
|
||||||
std::string_view size) {
|
std::string_view size) {
|
||||||
|
if (ctx.runtime_info.glasm_use_storage_buffers) {
|
||||||
|
ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
|
||||||
|
} else {
|
||||||
StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
|
StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
|
void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
|
||||||
std::string_view size) {
|
std::string_view size) {
|
||||||
const Register ret{ctx.reg_alloc.Define(inst)};
|
const Register ret{ctx.reg_alloc.Define(inst)};
|
||||||
|
if (ctx.runtime_info.glasm_use_storage_buffers) {
|
||||||
|
ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
|
||||||
|
} else {
|
||||||
StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
|
StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
|
||||||
fmt::format("MOV.U {},{{0,0,0,0}};", ret));
|
fmt::format("MOV.U {},{{0,0,0,0}};", ret));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
|
void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
|
||||||
GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value));
|
if (ctx.runtime_info.glasm_use_storage_buffers) {
|
||||||
|
GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
|
||||||
|
} else {
|
||||||
|
GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
|
void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
|
||||||
const Register ret{ctx.reg_alloc.Define(inst)};
|
const Register ret{ctx.reg_alloc.Define(inst)};
|
||||||
GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret),
|
if (ctx.runtime_info.glasm_use_storage_buffers) {
|
||||||
|
GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
|
||||||
|
} else {
|
||||||
|
GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
|
||||||
fmt::format("MOV.S {},0;", ret));
|
fmt::format("MOV.S {},0;", ret));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ValueType>
|
||||||
|
void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
|
||||||
|
ValueType value, std::string_view operation, std::string_view size) {
|
||||||
|
const Register ret{ctx.reg_alloc.Define(inst)};
|
||||||
|
if (ctx.runtime_info.glasm_use_storage_buffers) {
|
||||||
|
ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
|
||||||
|
offset);
|
||||||
|
} else {
|
||||||
|
StorageOp(ctx, binding, offset,
|
||||||
|
fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
|
||||||
|
}
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
|
void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
|
||||||
|
@ -212,4 +250,318 @@ void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 o
|
||||||
Write(ctx, binding, offset, value, "U32X4");
|
Write(ctx, binding, offset, value, "U32X4");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarS32 value) {
|
||||||
|
ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarS32 value) {
|
||||||
|
ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
ScalarU32 value) {
|
||||||
|
ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||||
|
Register value) {
|
||||||
|
ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "ADD", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarS32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MIN", "S32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MIN", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarS32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MAX", "S32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MAX", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "AND", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "OR", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "XOR", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarU32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "ADD", "U64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MIN", "S64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MIN", "U64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MAX", "S64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MAX", "U64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "AND", "U64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "OR", "U64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "XOR", "U64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, ScalarF32 value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "ADD", "F32");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||||
|
[[maybe_unused]] const IR::Value& binding,
|
||||||
|
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||||
|
[[maybe_unused]] const IR::Value& binding,
|
||||||
|
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
ScalarU32 offset, Register value) {
|
||||||
|
Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||||
|
[[maybe_unused]] const IR::Value& binding,
|
||||||
|
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicIAdd32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicSMin32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicUMin32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicSMax32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicUMax32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicInc32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicDec32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicAnd32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicOr32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicXor32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicExchange32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicIAdd64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicSMin64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicUMin64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicSMax64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicUMax64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicInc64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicDec64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicAnd64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicOr64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicXor64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicExchange64(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicAddF32(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicAddF16x2(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicAddF32x2(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicMinF16x2(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicMinF32x2(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicMaxF16x2(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGlobalAtomicMaxF32x2(EmitContext&) {
|
||||||
|
throw NotImplementedException("GLASM instruction");
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::GLASM
|
} // namespace Shader::Backend::GLASM
|
||||||
|
|
|
@ -111,7 +111,10 @@ struct RuntimeInfo {
|
||||||
std::optional<CompareFunction> alpha_test_func;
|
std::optional<CompareFunction> alpha_test_func;
|
||||||
float alpha_test_reference{};
|
float alpha_test_reference{};
|
||||||
|
|
||||||
|
// Static y negate value
|
||||||
bool y_negate{};
|
bool y_negate{};
|
||||||
|
// Use storage buffers instead of global pointers on GLASM
|
||||||
|
bool glasm_use_storage_buffers{};
|
||||||
|
|
||||||
std::vector<TransformFeedbackVarying> xfb_varyings;
|
std::vector<TransformFeedbackVarying> xfb_varyings;
|
||||||
};
|
};
|
||||||
|
|
|
@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
|
||||||
|
|
||||||
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||||
u32 offset, u32 size, bool is_written) {
|
u32 offset, u32 size, bool is_written) {
|
||||||
if (use_assembly_shaders) {
|
if (use_storage_buffers) {
|
||||||
|
const GLuint base_binding = graphics_base_storage_bindings[stage];
|
||||||
|
const GLuint binding = base_binding + binding_index;
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
||||||
|
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||||
|
} else {
|
||||||
const BindlessSSBO ssbo{
|
const BindlessSSBO ssbo{
|
||||||
.address = buffer.HostGpuAddr() + offset,
|
.address = buffer.HostGpuAddr() + offset,
|
||||||
.length = static_cast<GLsizei>(size),
|
.length = static_cast<GLsizei>(size),
|
||||||
|
@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
|
||||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||||
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
||||||
reinterpret_cast<const GLuint*>(&ssbo));
|
reinterpret_cast<const GLuint*>(&ssbo));
|
||||||
} else {
|
|
||||||
const GLuint base_binding = graphics_base_storage_bindings[stage];
|
|
||||||
const GLuint binding = base_binding + binding_index;
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
|
||||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||||
u32 size, bool is_written) {
|
u32 size, bool is_written) {
|
||||||
if (use_assembly_shaders) {
|
if (use_storage_buffers) {
|
||||||
|
if (size != 0) {
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
|
||||||
|
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||||
|
} else {
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
const BindlessSSBO ssbo{
|
const BindlessSSBO ssbo{
|
||||||
.address = buffer.HostGpuAddr() + offset,
|
.address = buffer.HostGpuAddr() + offset,
|
||||||
.length = static_cast<GLsizei>(size),
|
.length = static_cast<GLsizei>(size),
|
||||||
|
@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
|
||||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||||
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
|
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
|
||||||
reinterpret_cast<const GLuint*>(&ssbo));
|
reinterpret_cast<const GLuint*>(&ssbo));
|
||||||
} else if (size == 0) {
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
|
|
||||||
} else {
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
|
|
||||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -147,6 +147,10 @@ public:
|
||||||
image_handles = image_handles_;
|
image_handles = image_handles_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetEnableStorageBuffers(bool use_storage_buffers_) {
|
||||||
|
use_storage_buffers = use_storage_buffers_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr std::array PABO_LUT{
|
static constexpr std::array PABO_LUT{
|
||||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
|
@ -160,6 +164,8 @@ private:
|
||||||
bool use_assembly_shaders = false;
|
bool use_assembly_shaders = false;
|
||||||
bool has_unified_vertex_buffers = false;
|
bool has_unified_vertex_buffers = false;
|
||||||
|
|
||||||
|
bool use_storage_buffers = false;
|
||||||
|
|
||||||
u32 max_attributes = 0;
|
u32 max_attributes = 0;
|
||||||
|
|
||||||
std::array<GLuint, 5> graphics_base_uniform_bindings{};
|
std::array<GLuint, 5> graphics_base_uniform_bindings{};
|
||||||
|
|
|
@ -17,6 +17,15 @@ using VideoCommon::ImageId;
|
||||||
constexpr u32 MAX_TEXTURES = 64;
|
constexpr u32 MAX_TEXTURES = 64;
|
||||||
constexpr u32 MAX_IMAGES = 16;
|
constexpr u32 MAX_IMAGES = 16;
|
||||||
|
|
||||||
|
template <typename Range>
|
||||||
|
u32 AccumulateCount(const Range& range) {
|
||||||
|
u32 num{};
|
||||||
|
for (const auto& desc : range) {
|
||||||
|
num += desc.count;
|
||||||
|
}
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
|
||||||
size_t ComputePipelineKey::Hash() const noexcept {
|
size_t ComputePipelineKey::Hash() const noexcept {
|
||||||
return static_cast<size_t>(
|
return static_cast<size_t>(
|
||||||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
|
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
|
||||||
|
@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
|
||||||
return std::memcmp(this, &rhs, sizeof *this) == 0;
|
return std::memcmp(this, &rhs, sizeof *this) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
|
||||||
Tegra::MemoryManager& gpu_memory_,
|
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
|
||||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||||
ProgramManager& program_manager_, const Shader::Info& info_,
|
ProgramManager& program_manager_, const Shader::Info& info_,
|
||||||
OGLProgram source_program_, OGLAssemblyProgram assembly_program_)
|
OGLProgram source_program_, OGLAssemblyProgram assembly_program_)
|
||||||
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
|
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
|
||||||
kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_},
|
kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_},
|
||||||
source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} {
|
source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} {
|
||||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
|
||||||
num_texture_buffers += desc.count;
|
num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
|
||||||
}
|
num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
|
||||||
for (const auto& desc : info.image_buffer_descriptors) {
|
|
||||||
num_image_buffers += desc.count;
|
const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
|
||||||
}
|
|
||||||
u32 num_textures = num_texture_buffers;
|
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
|
||||||
num_textures += desc.count;
|
|
||||||
}
|
|
||||||
ASSERT(num_textures <= MAX_TEXTURES);
|
ASSERT(num_textures <= MAX_TEXTURES);
|
||||||
|
|
||||||
u32 num_images = num_image_buffers;
|
const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
|
||||||
for (const auto& desc : info.image_descriptors) {
|
|
||||||
num_images += desc.count;
|
|
||||||
}
|
|
||||||
ASSERT(num_images <= MAX_IMAGES);
|
ASSERT(num_images <= MAX_IMAGES);
|
||||||
|
|
||||||
|
const bool is_glasm{assembly_program.handle != 0};
|
||||||
|
const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
|
||||||
|
use_storage_buffers =
|
||||||
|
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
|
||||||
|
writes_global_memory = !use_storage_buffers &&
|
||||||
|
std::ranges::any_of(info.storage_buffers_descriptors,
|
||||||
|
[](const auto& desc) { return desc.is_written; });
|
||||||
}
|
}
|
||||||
|
|
||||||
void ComputePipeline::Configure() {
|
void ComputePipeline::Configure() {
|
||||||
|
@ -150,6 +159,7 @@ void ComputePipeline::Configure() {
|
||||||
|
|
||||||
buffer_cache.UpdateComputeBuffers();
|
buffer_cache.UpdateComputeBuffers();
|
||||||
|
|
||||||
|
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
|
||||||
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
|
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
|
||||||
buffer_cache.BindHostComputeBuffers();
|
buffer_cache.BindHostComputeBuffers();
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ struct Info;
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class Device;
|
||||||
class ProgramManager;
|
class ProgramManager;
|
||||||
|
|
||||||
struct ComputePipelineKey {
|
struct ComputePipelineKey {
|
||||||
|
@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
|
||||||
|
|
||||||
class ComputePipeline {
|
class ComputePipeline {
|
||||||
public:
|
public:
|
||||||
explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
|
||||||
Tegra::MemoryManager& gpu_memory_,
|
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
|
||||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||||
ProgramManager& program_manager_, const Shader::Info& info_,
|
ProgramManager& program_manager_, const Shader::Info& info_,
|
||||||
OGLProgram source_program_, OGLAssemblyProgram assembly_program_);
|
OGLProgram source_program_, OGLAssemblyProgram assembly_program_);
|
||||||
|
|
||||||
void Configure();
|
void Configure();
|
||||||
|
|
||||||
|
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
|
||||||
|
return writes_global_memory;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TextureCache& texture_cache;
|
TextureCache& texture_cache;
|
||||||
BufferCache& buffer_cache;
|
BufferCache& buffer_cache;
|
||||||
|
@ -70,6 +75,9 @@ private:
|
||||||
|
|
||||||
u32 num_texture_buffers{};
|
u32 num_texture_buffers{};
|
||||||
u32 num_image_buffers{};
|
u32 num_image_buffers{};
|
||||||
|
|
||||||
|
bool use_storage_buffers{};
|
||||||
|
bool writes_global_memory{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -135,13 +135,13 @@ Device::Device() {
|
||||||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||||
disable_fast_buffer_sub_data = true;
|
disable_fast_buffer_sub_data = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
max_uniform_buffers = BuildMaxUniformBuffers();
|
max_uniform_buffers = BuildMaxUniformBuffers();
|
||||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||||
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
|
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
|
||||||
|
max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
|
||||||
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
|
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
|
||||||
GLAD_GL_NV_shader_thread_shuffle;
|
GLAD_GL_NV_shader_thread_shuffle;
|
||||||
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
|
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
|
||||||
|
@ -236,22 +236,6 @@ std::string Device::GetVendorName() const {
|
||||||
return vendor_name;
|
return vendor_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
Device::Device(std::nullptr_t) {
|
|
||||||
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
|
|
||||||
uniform_buffer_alignment = 4;
|
|
||||||
shader_storage_alignment = 4;
|
|
||||||
max_vertex_attributes = 16;
|
|
||||||
max_varyings = 15;
|
|
||||||
max_compute_shared_memory_size = 0x10000;
|
|
||||||
has_warp_intrinsics = true;
|
|
||||||
has_shader_ballot = true;
|
|
||||||
has_vertex_viewport_layer = true;
|
|
||||||
has_image_load_formatted = true;
|
|
||||||
has_texture_shadow_lod = true;
|
|
||||||
has_variable_aoffi = true;
|
|
||||||
has_depth_buffer_float = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Device::TestVariableAoffi() {
|
bool Device::TestVariableAoffi() {
|
||||||
return TestProgram(R"(#version 430 core
|
return TestProgram(R"(#version 430 core
|
||||||
// This is a unit test, please ignore me on apitrace bug reports.
|
// This is a unit test, please ignore me on apitrace bug reports.
|
||||||
|
|
|
@ -13,7 +13,6 @@ namespace OpenGL {
|
||||||
class Device {
|
class Device {
|
||||||
public:
|
public:
|
||||||
explicit Device();
|
explicit Device();
|
||||||
explicit Device(std::nullptr_t);
|
|
||||||
|
|
||||||
[[nodiscard]] std::string GetVendorName() const;
|
[[nodiscard]] std::string GetVendorName() const;
|
||||||
|
|
||||||
|
@ -41,6 +40,10 @@ public:
|
||||||
return max_compute_shared_memory_size;
|
return max_compute_shared_memory_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 GetMaxGLASMStorageBufferBlocks() const {
|
||||||
|
return max_glasm_storage_buffer_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
bool HasWarpIntrinsics() const {
|
bool HasWarpIntrinsics() const {
|
||||||
return has_warp_intrinsics;
|
return has_warp_intrinsics;
|
||||||
}
|
}
|
||||||
|
@ -124,6 +127,7 @@ private:
|
||||||
u32 max_vertex_attributes{};
|
u32 max_vertex_attributes{};
|
||||||
u32 max_varyings{};
|
u32 max_varyings{};
|
||||||
u32 max_compute_shared_memory_size{};
|
u32 max_compute_shared_memory_size{};
|
||||||
|
u32 max_glasm_storage_buffer_blocks{};
|
||||||
bool has_warp_intrinsics{};
|
bool has_warp_intrinsics{};
|
||||||
bool has_shader_ballot{};
|
bool has_shader_ballot{};
|
||||||
bool has_vertex_viewport_layer{};
|
bool has_vertex_viewport_layer{};
|
||||||
|
|
|
@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64;
|
||||||
constexpr u32 MAX_IMAGES = 8;
|
constexpr u32 MAX_IMAGES = 8;
|
||||||
|
|
||||||
template <typename Range>
|
template <typename Range>
|
||||||
u32 AccumulateCount(Range&& range) {
|
u32 AccumulateCount(const Range& range) {
|
||||||
u32 num{};
|
u32 num{};
|
||||||
for (const auto& desc : range) {
|
for (const auto& desc : range) {
|
||||||
num += desc.count;
|
num += desc.count;
|
||||||
|
@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc
|
||||||
return std::memcmp(this, &rhs, Size()) == 0;
|
return std::memcmp(this, &rhs, Size()) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
|
||||||
Tegra::MemoryManager& gpu_memory_,
|
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
|
||||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||||
OGLProgram program_,
|
OGLProgram program_,
|
||||||
|
@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
|
||||||
}
|
}
|
||||||
u32 num_textures{};
|
u32 num_textures{};
|
||||||
u32 num_images{};
|
u32 num_images{};
|
||||||
|
u32 num_storage_buffers{};
|
||||||
for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
|
for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
|
||||||
const auto& info{stage_infos[stage]};
|
const auto& info{stage_infos[stage]};
|
||||||
if (stage < 4) {
|
if (stage < 4) {
|
||||||
|
@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
|
||||||
|
|
||||||
num_textures += AccumulateCount(info.texture_descriptors);
|
num_textures += AccumulateCount(info.texture_descriptors);
|
||||||
num_images += AccumulateCount(info.image_descriptors);
|
num_images += AccumulateCount(info.image_descriptors);
|
||||||
|
num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
|
||||||
|
|
||||||
|
writes_global_memory |= std::ranges::any_of(
|
||||||
|
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
|
||||||
}
|
}
|
||||||
ASSERT(num_textures <= MAX_TEXTURES);
|
ASSERT(num_textures <= MAX_TEXTURES);
|
||||||
ASSERT(num_images <= MAX_IMAGES);
|
ASSERT(num_images <= MAX_IMAGES);
|
||||||
|
|
||||||
if (assembly_programs[0].handle != 0 && xfb_state) {
|
const bool assembly_shaders{assembly_programs[0].handle != 0};
|
||||||
|
use_storage_buffers =
|
||||||
|
!assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
|
||||||
|
writes_global_memory &= !use_storage_buffers;
|
||||||
|
|
||||||
|
if (assembly_shaders && xfb_state) {
|
||||||
GenerateTransformFeedbackState(*xfb_state);
|
GenerateTransformFeedbackState(*xfb_state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) {
|
||||||
|
|
||||||
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
|
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
|
||||||
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
|
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
|
||||||
|
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
|
||||||
|
|
||||||
const auto& regs{maxwell3d.regs};
|
const auto& regs{maxwell3d.regs};
|
||||||
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
|
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class Device;
|
||||||
class ProgramManager;
|
class ProgramManager;
|
||||||
|
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
|
||||||
|
|
||||||
class GraphicsPipeline {
|
class GraphicsPipeline {
|
||||||
public:
|
public:
|
||||||
explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
|
||||||
Tegra::MemoryManager& gpu_memory_,
|
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
|
||||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||||
OGLProgram program_,
|
OGLProgram program_,
|
||||||
|
@ -77,6 +78,10 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
|
||||||
|
return writes_global_memory;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state);
|
void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state);
|
||||||
|
|
||||||
|
@ -99,6 +104,9 @@ private:
|
||||||
std::array<u32, 5> num_texture_buffers{};
|
std::array<u32, 5> num_texture_buffers{};
|
||||||
std::array<u32, 5> num_image_buffers{};
|
std::array<u32, 5> num_image_buffers{};
|
||||||
|
|
||||||
|
bool use_storage_buffers{};
|
||||||
|
bool writes_global_memory{};
|
||||||
|
|
||||||
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
|
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
|
||||||
GLsizei num_xfb_attribs{};
|
GLsizei num_xfb_attribs{};
|
||||||
GLsizei num_xfb_strides{};
|
GLsizei num_xfb_strides{};
|
||||||
|
|
|
@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
EndTransformFeedback();
|
EndTransformFeedback();
|
||||||
|
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
|
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
||||||
|
|
||||||
gpu.TickWork();
|
gpu.TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::DispatchCompute() {
|
void RasterizerOpenGL::DispatchCompute() {
|
||||||
ComputePipeline* const program{shader_cache.CurrentComputePipeline()};
|
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
|
||||||
if (!program) {
|
if (!pipeline) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
program->Configure();
|
pipeline->Configure();
|
||||||
const auto& qmd{kepler_compute.launch_description};
|
const auto& qmd{kepler_compute.launch_description};
|
||||||
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
|
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
|
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
||||||
|
@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() {
|
||||||
|
|
||||||
// Make sure memory stored from the previous GL command stream is visible
|
// Make sure memory stored from the previous GL command stream is visible
|
||||||
// This is only needed on assembly shaders where we write to GPU memory with raw pointers
|
// This is only needed on assembly shaders where we write to GPU memory with raw pointers
|
||||||
// TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used
|
if (has_written_global_memory) {
|
||||||
// and prefer using NV_shader_storage_buffer_object when possible
|
has_written_global_memory = false;
|
||||||
if (Settings::values.use_assembly_shaders.GetValue()) {
|
|
||||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||||
}
|
}
|
||||||
glFlush();
|
glFlush();
|
||||||
|
|
|
@ -225,7 +225,8 @@ private:
|
||||||
std::array<GLuint, MAX_IMAGES> image_handles{};
|
std::array<GLuint, MAX_IMAGES> image_handles{};
|
||||||
|
|
||||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||||
std::size_t num_queued_commands = 0;
|
size_t num_queued_commands = 0;
|
||||||
|
bool has_written_global_memory = false;
|
||||||
|
|
||||||
u32 last_clip_distance_mask = 0;
|
u32 last_clip_distance_mask = 0;
|
||||||
};
|
};
|
||||||
|
|
|
@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
|
Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
|
||||||
const Shader::IR::Program& program) {
|
const Shader::IR::Program& program,
|
||||||
|
bool glasm_use_storage_buffers) {
|
||||||
Shader::RuntimeInfo info;
|
Shader::RuntimeInfo info;
|
||||||
switch (program.stage) {
|
switch (program.stage) {
|
||||||
case Shader::Stage::TessellationEval:
|
case Shader::Stage::TessellationEval:
|
||||||
|
@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
|
||||||
info.input_topology = Shader::InputTopology::TrianglesAdjacency;
|
info.input_topology = Shader::InputTopology::TrianglesAdjacency;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
info.glasm_use_storage_buffers = glasm_use_storage_buffers;
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -435,7 +437,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
||||||
ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs,
|
ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs,
|
||||||
bool build_in_parallel) {
|
bool build_in_parallel) {
|
||||||
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
|
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
|
||||||
size_t env_index{0};
|
size_t env_index{};
|
||||||
|
u32 total_storage_buffers{};
|
||||||
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
||||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||||
if (key.unique_hashes[index] == 0) {
|
if (key.unique_hashes[index] == 0) {
|
||||||
|
@ -447,7 +450,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
||||||
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
||||||
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
|
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
|
||||||
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
|
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
|
||||||
|
|
||||||
|
for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
|
||||||
|
total_storage_buffers += desc.count;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
|
||||||
|
const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
|
||||||
|
|
||||||
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
|
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
|
||||||
|
|
||||||
OGLProgram source_program;
|
OGLProgram source_program;
|
||||||
|
@ -466,7 +476,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
||||||
const size_t stage_index{index - 1};
|
const size_t stage_index{index - 1};
|
||||||
infos[stage_index] = &program.info;
|
infos[stage_index] = &program.info;
|
||||||
|
|
||||||
const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)};
|
const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)};
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
const std::string code{EmitGLASM(profile, runtime_info, program, binding)};
|
const std::string code{EmitGLASM(profile, runtime_info, program, binding)};
|
||||||
assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index));
|
assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index));
|
||||||
|
@ -479,7 +489,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
||||||
LinkProgram(source_program.handle);
|
LinkProgram(source_program.handle);
|
||||||
}
|
}
|
||||||
return std::make_unique<GraphicsPipeline>(
|
return std::make_unique<GraphicsPipeline>(
|
||||||
texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
|
device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
|
||||||
std::move(source_program), std::move(assembly_programs), infos,
|
std::move(source_program), std::move(assembly_programs), infos,
|
||||||
key.xfb_enabled != 0 ? &key.xfb_state : nullptr);
|
key.xfb_enabled != 0 ? &key.xfb_state : nullptr);
|
||||||
}
|
}
|
||||||
|
@ -508,10 +518,18 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
|
||||||
|
|
||||||
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
||||||
Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
|
Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
|
||||||
|
|
||||||
|
u32 num_storage_buffers{};
|
||||||
|
for (const auto& desc : program.info.storage_buffers_descriptors) {
|
||||||
|
num_storage_buffers += desc.count;
|
||||||
|
}
|
||||||
|
Shader::RuntimeInfo info;
|
||||||
|
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
|
||||||
|
|
||||||
OGLAssemblyProgram asm_program;
|
OGLAssemblyProgram asm_program;
|
||||||
OGLProgram source_program;
|
OGLProgram source_program;
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
const std::string code{EmitGLASM(profile, program)};
|
const std::string code{EmitGLASM(profile, info, program)};
|
||||||
asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
|
asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
|
||||||
} else {
|
} else {
|
||||||
const std::vector<u32> code{EmitSPIRV(profile, program)};
|
const std::vector<u32> code{EmitSPIRV(profile, program)};
|
||||||
|
@ -519,7 +537,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
|
||||||
AddShader(GL_COMPUTE_SHADER, source_program.handle, code);
|
AddShader(GL_COMPUTE_SHADER, source_program.handle, code);
|
||||||
LinkProgram(source_program.handle);
|
LinkProgram(source_program.handle);
|
||||||
}
|
}
|
||||||
return std::make_unique<ComputePipeline>(texture_cache, buffer_cache, gpu_memory,
|
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
|
||||||
kepler_compute, program_manager, program.info,
|
kepler_compute, program_manager, program.info,
|
||||||
std::move(source_program), std::move(asm_program));
|
std::move(source_program), std::move(asm_program));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue