Merge pull request #7629 from ameerj/nv-driver-fixes
shaders: Add fixes for NVIDIA drivers 495+
This commit is contained in:
commit
ae7da0b12d
18 changed files with 140 additions and 30 deletions
|
@ -126,6 +126,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32) {
|
||||||
|
switch (attr) {
|
||||||
|
case IR::Attribute::PrimitiveId:
|
||||||
|
ctx.Add("MOV.S {}.x,primitive.id;", inst);
|
||||||
|
break;
|
||||||
|
case IR::Attribute::InstanceId:
|
||||||
|
ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
|
||||||
|
break;
|
||||||
|
case IR::Attribute::VertexId:
|
||||||
|
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
|
||||||
[[maybe_unused]] ScalarU32 vertex) {
|
[[maybe_unused]] ScalarU32 vertex) {
|
||||||
const u32 element{static_cast<u32>(attr) % 4};
|
const u32 element{static_cast<u32>(attr) % 4};
|
||||||
|
|
|
@ -50,6 +50,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
|
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
|
||||||
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
|
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
|
||||||
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
|
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
|
||||||
|
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
|
||||||
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
|
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
|
||||||
void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
|
void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
|
#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
|
||||||
#include "shader_recompiler/backend/glsl/glsl_emit_context.h"
|
#include "shader_recompiler/backend/glsl/glsl_emit_context.h"
|
||||||
#include "shader_recompiler/frontend/ir/value.h"
|
#include "shader_recompiler/frontend/ir/value.h"
|
||||||
|
#include "shader_recompiler/profile.h"
|
||||||
|
|
||||||
namespace Shader::Backend::GLSL {
|
namespace Shader::Backend::GLSL {
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -30,8 +31,9 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value)
|
||||||
inst.DestructiveAddUsage(1);
|
inst.DestructiveAddUsage(1);
|
||||||
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
|
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
|
||||||
const auto input{ctx.var_alloc.Consume(value)};
|
const auto input{ctx.var_alloc.Consume(value)};
|
||||||
|
const auto suffix{ctx.profile.has_gl_bool_ref_bug ? "?true:false" : ""};
|
||||||
if (ret != input) {
|
if (ret != input) {
|
||||||
ctx.Add("{}={};", ret, input);
|
ctx.Add("{}={}{};", ret, input, suffix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const
|
||||||
|
|
||||||
void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
GetCbuf8(ctx, inst, binding, offset, "ftou");
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||||
|
GetCbuf8(ctx, inst, binding, offset, cast);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
GetCbuf8(ctx, inst, binding, offset, "ftoi");
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
|
||||||
|
GetCbuf8(ctx, inst, binding, offset, cast);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
GetCbuf16(ctx, inst, binding, offset, "ftou");
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||||
|
GetCbuf16(ctx, inst, binding, offset, cast);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
GetCbuf16(ctx, inst, binding, offset, "ftoi");
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
|
||||||
|
GetCbuf16(ctx, inst, binding, offset, cast);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
|
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
|
||||||
GetCbuf(ctx, ret, binding, offset, 32, "ftou");
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||||
|
GetCbuf(ctx, ret, binding, offset, 32, cast);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
|
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
|
||||||
GetCbuf(ctx, ret, binding, offset, 32);
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""};
|
||||||
|
GetCbuf(ctx, ret, binding, offset, 32, cast);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
|
const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
|
||||||
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||||
if (offset.IsImmediate()) {
|
if (offset.IsImmediate()) {
|
||||||
static constexpr u32 cbuf_size{0x10000};
|
static constexpr u32 cbuf_size{0x10000};
|
||||||
const u32 u32_offset{offset.U32()};
|
const u32 u32_offset{offset.U32()};
|
||||||
|
@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (u32_offset % 2 == 0) {
|
if (u32_offset % 2 == 0) {
|
||||||
ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
|
ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16,
|
||||||
OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
|
OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
|
||||||
} else {
|
} else {
|
||||||
ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
|
ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
|
||||||
OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
|
u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf,
|
||||||
OffsetSwizzle(u32_offset + 4));
|
(u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto offset_var{ctx.var_alloc.Consume(offset)};
|
const auto offset_var{ctx.var_alloc.Consume(offset)};
|
||||||
if (!ctx.profile.has_gl_component_indexing_bug) {
|
if (!ctx.profile.has_gl_component_indexing_bug) {
|
||||||
ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
|
ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
|
||||||
inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
|
cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
|
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
|
||||||
const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
|
const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
|
||||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||||
ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
|
ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
|
||||||
swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
|
swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
|
||||||
"xyzw"[(swizzle + 1) % 4]);
|
"xyzw"[(swizzle + 1) % 4]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -221,6 +228,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) {
|
||||||
|
switch (attr) {
|
||||||
|
case IR::Attribute::PrimitiveId:
|
||||||
|
ctx.AddU32("{}=uint(gl_PrimitiveID);", inst);
|
||||||
|
break;
|
||||||
|
case IR::Attribute::InstanceId:
|
||||||
|
ctx.AddU32("{}=uint(gl_InstanceID);", inst);
|
||||||
|
break;
|
||||||
|
case IR::Attribute::VertexId:
|
||||||
|
ctx.AddU32("{}=uint(gl_VertexID);", inst);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
|
||||||
[[maybe_unused]] std::string_view vertex) {
|
[[maybe_unused]] std::string_view vertex) {
|
||||||
if (IR::IsGeneric(attr)) {
|
if (IR::IsGeneric(attr)) {
|
||||||
|
|
|
@ -125,11 +125,11 @@ void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
ctx.AddF32("{}=-({});", inst, value);
|
ctx.AddF32("{}=0.f-({});", inst, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
ctx.AddF64("{}=-({});", inst, value);
|
ctx.AddF64("{}=double(0.)-({});", inst, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
|
|
|
@ -60,6 +60,8 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
|
||||||
const IR::Value& offset);
|
const IR::Value& offset);
|
||||||
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
||||||
std::string_view vertex);
|
std::string_view vertex);
|
||||||
|
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
||||||
|
std::string_view vertex);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
|
||||||
std::string_view vertex);
|
std::string_view vertex);
|
||||||
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
|
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
|
||||||
|
|
|
@ -87,11 +87,11 @@ void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
ctx.AddU32("{}=uint(-({}));", inst, value);
|
ctx.AddU32("{}=uint(int(0)-int({}));", inst, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
ctx.AddU64("{}=-({});", inst, value);
|
ctx.AddU64("{}=uint64_t(int64_t(0)-int64_t({}));", inst, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
|
|
|
@ -90,7 +90,9 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
|
||||||
if (phi_reg == val_reg) {
|
if (phi_reg == val_reg) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ctx.Add("{}={};", phi_reg, val_reg);
|
const bool needs_workaround{ctx.profile.has_gl_bool_ref_bug && phi_type == IR::Type::U1};
|
||||||
|
const auto suffix{needs_workaround ? "?true:false" : ""};
|
||||||
|
ctx.Add("{}={}{};", phi_reg, val_reg, suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitPrologue(EmitContext& ctx) {
|
void EmitPrologue(EmitContext& ctx) {
|
||||||
|
|
|
@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.constant_buffer_descriptors) {
|
for (const auto& desc : info.constant_buffer_descriptors) {
|
||||||
header += fmt::format(
|
const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"};
|
||||||
"layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
|
header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};",
|
||||||
bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
|
bindings.uniform_buffer, stage_name, desc.index, cbuf_type,
|
||||||
|
stage_name, desc.index, 4 * 1024);
|
||||||
bindings.uniform_buffer += desc.count;
|
bindings.uniform_buffer += desc.count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -355,6 +355,31 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
|
||||||
|
switch (attr) {
|
||||||
|
case IR::Attribute::PrimitiveId:
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||||
|
case IR::Attribute::InstanceId:
|
||||||
|
if (ctx.profile.support_vertex_instance_id) {
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
||||||
|
} else {
|
||||||
|
const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
|
||||||
|
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
|
||||||
|
return ctx.OpISub(ctx.U32[1], index, base);
|
||||||
|
}
|
||||||
|
case IR::Attribute::VertexId:
|
||||||
|
if (ctx.profile.support_vertex_instance_id) {
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
|
||||||
|
} else {
|
||||||
|
const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
|
||||||
|
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||||
|
return ctx.OpISub(ctx.U32[1], index, base);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
|
||||||
const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
|
const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
|
||||||
if (!output) {
|
if (!output) {
|
||||||
|
|
|
@ -53,6 +53,7 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
|
||||||
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
||||||
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
|
||||||
|
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
|
||||||
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
|
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
|
||||||
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
|
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
|
||||||
|
|
|
@ -40,6 +40,7 @@ OPCODE(GetCbufU32, U32, U32,
|
||||||
OPCODE(GetCbufF32, F32, U32, U32, )
|
OPCODE(GetCbufF32, F32, U32, U32, )
|
||||||
OPCODE(GetCbufU32x2, U32x2, U32, U32, )
|
OPCODE(GetCbufU32x2, U32x2, U32, U32, )
|
||||||
OPCODE(GetAttribute, F32, Attribute, U32, )
|
OPCODE(GetAttribute, F32, Attribute, U32, )
|
||||||
|
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||||
OPCODE(GetAttributeIndexed, F32, U32, U32, )
|
OPCODE(GetAttributeIndexed, F32, U32, U32, )
|
||||||
OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
|
OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
|
||||||
|
|
|
@ -389,6 +389,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
info.uses_demote_to_helper_invocation = true;
|
info.uses_demote_to_helper_invocation = true;
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::GetAttribute:
|
case IR::Opcode::GetAttribute:
|
||||||
|
case IR::Opcode::GetAttributeU32:
|
||||||
info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
|
info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::SetAttribute:
|
case IR::Opcode::SetAttribute:
|
||||||
|
|
|
@ -505,6 +505,29 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if constexpr (op == IR::Opcode::BitCastU32F32) {
|
||||||
|
// Workaround for new NVIDIA driver bug, where:
|
||||||
|
// uint attr = ftou(itof(gl_InstanceID));
|
||||||
|
// always returned 0.
|
||||||
|
// We can instead manually optimize this and work around the driver bug:
|
||||||
|
// uint attr = uint(gl_InstanceID);
|
||||||
|
if (arg_inst->GetOpcode() == IR::Opcode::GetAttribute) {
|
||||||
|
const IR::Attribute attr{arg_inst->Arg(0).Attribute()};
|
||||||
|
switch (attr) {
|
||||||
|
case IR::Attribute::PrimitiveId:
|
||||||
|
case IR::Attribute::InstanceId:
|
||||||
|
case IR::Attribute::VertexId:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Replace the bitcasts with an integer attribute get
|
||||||
|
inst.ReplaceOpcode(IR::Opcode::GetAttributeU32);
|
||||||
|
inst.SetArg(0, arg_inst->Arg(0));
|
||||||
|
inst.SetArg(1, arg_inst->Arg(1));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
|
void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
|
||||||
|
|
|
@ -65,6 +65,10 @@ struct Profile {
|
||||||
bool has_gl_component_indexing_bug{};
|
bool has_gl_component_indexing_bug{};
|
||||||
/// The precise type qualifier is broken in the fragment stage of some drivers
|
/// The precise type qualifier is broken in the fragment stage of some drivers
|
||||||
bool has_gl_precise_bug{};
|
bool has_gl_precise_bug{};
|
||||||
|
/// Some drivers do not properly support floatBitsToUint when used on cbufs
|
||||||
|
bool has_gl_cbuf_ftou_bug{};
|
||||||
|
/// Some drivers poorly optimize boolean variable references
|
||||||
|
bool has_gl_bool_ref_bug{};
|
||||||
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
|
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
|
||||||
bool ignore_nan_fp_comparisons{};
|
bool ignore_nan_fp_comparisons{};
|
||||||
|
|
||||||
|
|
|
@ -182,17 +182,13 @@ Device::Device() {
|
||||||
shader_backend = Settings::ShaderBackend::GLSL;
|
shader_backend = Settings::ShaderBackend::GLSL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia &&
|
if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
|
||||||
!Settings::values.renderer_debug) {
|
|
||||||
const std::string_view driver_version = version.substr(13);
|
const std::string_view driver_version = version.substr(13);
|
||||||
const int version_major =
|
const int version_major =
|
||||||
std::atoi(driver_version.substr(0, driver_version.find(".")).data());
|
std::atoi(driver_version.substr(0, driver_version.find(".")).data());
|
||||||
|
|
||||||
if (version_major >= 495) {
|
if (version_major >= 495) {
|
||||||
LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems "
|
has_cbuf_ftou_bug = true;
|
||||||
"with yuzu. Forcing GLASM as a mitigation.");
|
has_bool_ref_bug = true;
|
||||||
shader_backend = Settings::ShaderBackend::GLASM;
|
|
||||||
use_assembly_shaders = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -152,6 +152,14 @@ public:
|
||||||
return need_fastmath_off;
|
return need_fastmath_off;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasCbufFtouBug() const {
|
||||||
|
return has_cbuf_ftou_bug;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasBoolRefBug() const {
|
||||||
|
return has_bool_ref_bug;
|
||||||
|
}
|
||||||
|
|
||||||
Settings::ShaderBackend GetShaderBackend() const {
|
Settings::ShaderBackend GetShaderBackend() const {
|
||||||
return shader_backend;
|
return shader_backend;
|
||||||
}
|
}
|
||||||
|
@ -200,6 +208,8 @@ private:
|
||||||
bool has_sparse_texture_2{};
|
bool has_sparse_texture_2{};
|
||||||
bool warp_size_potentially_larger_than_guest{};
|
bool warp_size_potentially_larger_than_guest{};
|
||||||
bool need_fastmath_off{};
|
bool need_fastmath_off{};
|
||||||
|
bool has_cbuf_ftou_bug{};
|
||||||
|
bool has_bool_ref_bug{};
|
||||||
|
|
||||||
std::string vendor_name;
|
std::string vendor_name;
|
||||||
};
|
};
|
||||||
|
|
|
@ -214,6 +214,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
|
||||||
.has_broken_fp16_float_controls = false,
|
.has_broken_fp16_float_controls = false,
|
||||||
.has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
|
.has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
|
||||||
.has_gl_precise_bug = device.HasPreciseBug(),
|
.has_gl_precise_bug = device.HasPreciseBug(),
|
||||||
|
.has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
|
||||||
|
.has_gl_bool_ref_bug = device.HasBoolRefBug(),
|
||||||
.ignore_nan_fp_comparisons = true,
|
.ignore_nan_fp_comparisons = true,
|
||||||
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
|
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in a new issue