shader_recompiler,video_core: Cleanup some GCC and Clang errors
Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp <reinuseslisp@airmail.cc>
This commit is contained in:
parent
5cd3d00167
commit
0bb85f6a75
66 changed files with 308 additions and 313 deletions
|
@ -5,6 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <condition_variable>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <stop_token>
|
#include <stop_token>
|
||||||
|
|
|
@ -196,6 +196,8 @@ else()
|
||||||
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
|
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
|
||||||
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
|
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
|
||||||
-Werror=unused-variable
|
-Werror=unused-variable
|
||||||
|
|
||||||
|
$<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <climits>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
|
||||||
const std::string_view def_name_view(
|
const std::string_view def_name_view(
|
||||||
def_name.data(),
|
def_name.data(),
|
||||||
fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
|
fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
|
||||||
defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
|
defs[static_cast<size_t>(i)] =
|
||||||
|
sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
namespace {
|
namespace {
|
||||||
template <class Func>
|
template <class Func>
|
||||||
struct FuncTraits : FuncTraits<Func> {};
|
struct FuncTraits {};
|
||||||
|
|
||||||
template <class ReturnType_, class... Args>
|
template <class ReturnType_, class... Args>
|
||||||
struct FuncTraits<ReturnType_ (*)(Args...)> {
|
struct FuncTraits<ReturnType_ (*)(Args...)> {
|
||||||
|
@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
|
||||||
template <auto func, bool is_first_arg_inst, size_t... I>
|
template <auto func, bool is_first_arg_inst, size_t... I>
|
||||||
void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
|
void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
|
||||||
using Traits = FuncTraits<decltype(func)>;
|
using Traits = FuncTraits<decltype(func)>;
|
||||||
if constexpr (std::is_same_v<Traits::ReturnType, Id>) {
|
if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
|
||||||
if constexpr (is_first_arg_inst) {
|
if constexpr (is_first_arg_inst) {
|
||||||
SetDefinition<func>(ctx, inst, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
|
SetDefinition<func>(
|
||||||
|
ctx, inst, inst,
|
||||||
|
Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
|
||||||
} else {
|
} else {
|
||||||
SetDefinition<func>(ctx, inst, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
|
SetDefinition<func>(
|
||||||
|
ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if constexpr (is_first_arg_inst) {
|
if constexpr (is_first_arg_inst) {
|
||||||
func(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
|
func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
|
||||||
} else {
|
} else {
|
||||||
func(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
|
func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitInst(EmitContext& ctx, IR::Inst* inst) {
|
void EmitInst(EmitContext& ctx, IR::Inst* inst) {
|
||||||
switch (inst->Opcode()) {
|
switch (inst->GetOpcode()) {
|
||||||
#define OPCODE(name, result_type, ...) \
|
#define OPCODE(name, result_type, ...) \
|
||||||
case IR::Opcode::name: \
|
case IR::Opcode::name: \
|
||||||
return Invoke<&Emit##name>(ctx, inst);
|
return Invoke<&Emit##name>(ctx, inst);
|
||||||
#include "shader_recompiler/frontend/ir/opcodes.inc"
|
#include "shader_recompiler/frontend/ir/opcodes.inc"
|
||||||
#undef OPCODE
|
#undef OPCODE
|
||||||
}
|
}
|
||||||
throw LogicError("Invalid opcode {}", inst->Opcode());
|
throw LogicError("Invalid opcode {}", inst->GetOpcode());
|
||||||
}
|
}
|
||||||
|
|
||||||
Id TypeId(const EmitContext& ctx, IR::Type type) {
|
Id TypeId(const EmitContext& ctx, IR::Type type) {
|
||||||
|
|
|
@ -43,11 +43,13 @@ public:
|
||||||
// LOG_WARNING("Not all arguments in PTP are immediate, STUBBING");
|
// LOG_WARNING("Not all arguments in PTP are immediate, STUBBING");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const IR::Opcode opcode{values[0]->Opcode()};
|
const IR::Opcode opcode{values[0]->GetOpcode()};
|
||||||
if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
|
if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
|
||||||
throw LogicError("Invalid PTP arguments");
|
throw LogicError("Invalid PTP arguments");
|
||||||
}
|
}
|
||||||
auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }};
|
auto read{[&](unsigned int a, unsigned int b) {
|
||||||
|
return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32());
|
||||||
|
}};
|
||||||
|
|
||||||
const Id offsets{
|
const Id offsets{
|
||||||
ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)),
|
ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)),
|
||||||
|
@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
|
||||||
|
|
||||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
||||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
|
||||||
const ImageOperands operands(ctx, offset, offset2);
|
const ImageOperands operands(ctx, offset, offset2);
|
||||||
return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
|
return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
|
||||||
ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
|
ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
#pragma optimize("", off)
|
#pragma optimize("", off)
|
||||||
|
#endif
|
||||||
|
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
||||||
Id lod, Id ms) {
|
Id lod, Id ms) {
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
namespace {
|
namespace {
|
||||||
Id WarpExtract(EmitContext& ctx, Id value) {
|
Id WarpExtract(EmitContext& ctx, Id value) {
|
||||||
const Id shift{ctx.Constant(ctx.U32[1], 5)};
|
[[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)};
|
||||||
const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
|
const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
|
||||||
return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
|
return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
class FileEnvironment final : public Environment {
|
class FileEnvironment : public Environment {
|
||||||
public:
|
public:
|
||||||
explicit FileEnvironment(const char* path);
|
explicit FileEnvironment(const char* path);
|
||||||
~FileEnvironment() override;
|
~FileEnvironment() override;
|
||||||
|
|
|
@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) {
|
||||||
if (!IsGeneric(attribute)) {
|
if (!IsGeneric(attribute)) {
|
||||||
throw InvalidArgument("Attribute is not generic {}", attribute);
|
throw InvalidArgument("Attribute is not generic {}", attribute);
|
||||||
}
|
}
|
||||||
return (static_cast<int>(attribute) - static_cast<int>(Attribute::Generic0X)) / 4;
|
return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string NameOf(Attribute attribute) {
|
std::string NameOf(Attribute attribute) {
|
||||||
|
|
|
@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
|
||||||
ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd());
|
ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd());
|
||||||
|
|
||||||
for (const Inst& inst : block) {
|
for (const Inst& inst : block) {
|
||||||
const Opcode op{inst.Opcode()};
|
const Opcode op{inst.GetOpcode()};
|
||||||
ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
|
ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
|
||||||
if (TypeOf(op) != Type::Void) {
|
if (TypeOf(op) != Type::Void) {
|
||||||
ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
|
ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
|
||||||
|
|
|
@ -12,10 +12,10 @@ namespace Shader::IR {
|
||||||
|
|
||||||
std::string NameOf(Condition condition) {
|
std::string NameOf(Condition condition) {
|
||||||
std::string ret;
|
std::string ret;
|
||||||
if (condition.FlowTest() != FlowTest::T) {
|
if (condition.GetFlowTest() != FlowTest::T) {
|
||||||
ret = fmt::to_string(condition.FlowTest());
|
ret = fmt::to_string(condition.GetFlowTest());
|
||||||
}
|
}
|
||||||
const auto [pred, negated]{condition.Pred()};
|
const auto [pred, negated]{condition.GetPred()};
|
||||||
if (!ret.empty()) {
|
if (!ret.empty()) {
|
||||||
ret += '&';
|
ret += '&';
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,11 +30,11 @@ public:
|
||||||
|
|
||||||
auto operator<=>(const Condition&) const noexcept = default;
|
auto operator<=>(const Condition&) const noexcept = default;
|
||||||
|
|
||||||
[[nodiscard]] IR::FlowTest FlowTest() const noexcept {
|
[[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
|
||||||
return static_cast<IR::FlowTest>(flow_test);
|
return static_cast<IR::FlowTest>(flow_test);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::pair<IR::Pred, bool> Pred() const noexcept {
|
[[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
|
||||||
return {static_cast<IR::Pred>(pred), pred_negated != 0};
|
return {static_cast<IR::Pred>(pred), pred_negated != 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
|
||||||
}
|
}
|
||||||
|
|
||||||
U1 IREmitter::Condition(IR::Condition cond) {
|
U1 IREmitter::Condition(IR::Condition cond) {
|
||||||
const FlowTest flow_test{cond.FlowTest()};
|
const FlowTest flow_test{cond.GetFlowTest()};
|
||||||
const auto [pred, is_negated]{cond.Pred()};
|
const auto [pred, is_negated]{cond.GetPred()};
|
||||||
return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
|
return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
namespace Shader::IR {
|
namespace Shader::IR {
|
||||||
namespace {
|
namespace {
|
||||||
void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
|
void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
|
||||||
if (inst && inst->Opcode() != opcode) {
|
if (inst && inst->GetOpcode() != opcode) {
|
||||||
throw LogicError("Invalid pseudo-instruction");
|
throw LogicError("Invalid pseudo-instruction");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
|
void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
|
||||||
if (inst->Opcode() != expected_opcode) {
|
if (inst->GetOpcode() != expected_opcode) {
|
||||||
throw LogicError("Undoing use of invalid pseudo-op");
|
throw LogicError("Undoing use of invalid pseudo-op");
|
||||||
}
|
}
|
||||||
inst = nullptr;
|
inst = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
|
||||||
|
if (!associated_insts) {
|
||||||
|
associated_insts = std::make_unique<AssociatedInsts>();
|
||||||
|
}
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
|
Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
|
||||||
|
@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
|
||||||
op = opcode;
|
op = opcode;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
|
|
||||||
if (!associated_insts) {
|
|
||||||
associated_insts = std::make_unique<AssociatedInsts>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Inst::Use(const Value& value) {
|
void Inst::Use(const Value& value) {
|
||||||
Inst* const inst{value.Inst()};
|
Inst* const inst{value.Inst()};
|
||||||
++inst->use_count;
|
++inst->use_count;
|
||||||
|
|
|
@ -46,7 +46,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the opcode this microinstruction represents.
|
/// Get the opcode this microinstruction represents.
|
||||||
[[nodiscard]] IR::Opcode Opcode() const noexcept {
|
[[nodiscard]] IR::Opcode GetOpcode() const noexcept {
|
||||||
return op;
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ public:
|
||||||
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
|
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
|
||||||
[[nodiscard]] FlagsType Flags() const noexcept {
|
[[nodiscard]] FlagsType Flags() const noexcept {
|
||||||
FlagsType ret;
|
FlagsType ret;
|
||||||
std::memcpy(&ret, &flags, sizeof(ret));
|
std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ constexpr std::array META_TABLE{
|
||||||
#define OPCODE(name_token, type_token, ...) \
|
#define OPCODE(name_token, type_token, ...) \
|
||||||
OpcodeMeta{ \
|
OpcodeMeta{ \
|
||||||
.name{#name_token}, \
|
.name{#name_token}, \
|
||||||
.type{type_token}, \
|
.type = type_token, \
|
||||||
.arg_types{__VA_ARGS__}, \
|
.arg_types{__VA_ARGS__}, \
|
||||||
},
|
},
|
||||||
#include "opcodes.inc"
|
#include "opcodes.inc"
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|
|
@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
|
||||||
Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
|
Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
|
||||||
|
|
||||||
bool Value::IsIdentity() const noexcept {
|
bool Value::IsIdentity() const noexcept {
|
||||||
return type == Type::Opaque && inst->Opcode() == Opcode::Identity;
|
return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Value::IsPhi() const noexcept {
|
bool Value::IsPhi() const noexcept {
|
||||||
return type == Type::Opaque && inst->Opcode() == Opcode::Phi;
|
return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Value::IsEmpty() const noexcept {
|
bool Value::IsEmpty() const noexcept {
|
||||||
|
|
|
@ -94,7 +94,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {}
|
explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
using U1 = TypedValue<Type::U1>;
|
using U1 = TypedValue<Type::U1>;
|
||||||
|
|
|
@ -34,41 +34,37 @@ struct Compare {
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 BranchOffset(Location pc, Instruction inst) {
|
u32 BranchOffset(Location pc, Instruction inst) {
|
||||||
return pc.Offset() + inst.branch.Offset() + 8;
|
return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Split(Block* old_block, Block* new_block, Location pc) {
|
void Split(Block* old_block, Block* new_block, Location pc) {
|
||||||
if (pc <= old_block->begin || pc >= old_block->end) {
|
if (pc <= old_block->begin || pc >= old_block->end) {
|
||||||
throw InvalidArgument("Invalid address to split={}", pc);
|
throw InvalidArgument("Invalid address to split={}", pc);
|
||||||
}
|
}
|
||||||
*new_block = Block{
|
*new_block = Block{};
|
||||||
.begin{pc},
|
new_block->begin = pc;
|
||||||
.end{old_block->end},
|
new_block->end = old_block->end;
|
||||||
.end_class{old_block->end_class},
|
new_block->end_class = old_block->end_class,
|
||||||
.cond{old_block->cond},
|
new_block->cond = old_block->cond;
|
||||||
.stack{old_block->stack},
|
new_block->stack = old_block->stack;
|
||||||
.branch_true{old_block->branch_true},
|
new_block->branch_true = old_block->branch_true;
|
||||||
.branch_false{old_block->branch_false},
|
new_block->branch_false = old_block->branch_false;
|
||||||
.function_call{old_block->function_call},
|
new_block->function_call = old_block->function_call;
|
||||||
.return_block{old_block->return_block},
|
new_block->return_block = old_block->return_block;
|
||||||
.branch_reg{old_block->branch_reg},
|
new_block->branch_reg = old_block->branch_reg;
|
||||||
.branch_offset{old_block->branch_offset},
|
new_block->branch_offset = old_block->branch_offset;
|
||||||
.indirect_branches{std::move(old_block->indirect_branches)},
|
new_block->indirect_branches = std::move(old_block->indirect_branches);
|
||||||
};
|
|
||||||
*old_block = Block{
|
const Location old_begin{old_block->begin};
|
||||||
.begin{old_block->begin},
|
Stack old_stack{std::move(old_block->stack)};
|
||||||
.end{pc},
|
*old_block = Block{};
|
||||||
.end_class{EndClass::Branch},
|
old_block->begin = old_begin;
|
||||||
.cond{true},
|
old_block->end = pc;
|
||||||
.stack{std::move(old_block->stack)},
|
old_block->end_class = EndClass::Branch;
|
||||||
.branch_true{new_block},
|
old_block->cond = IR::Condition(true);
|
||||||
.branch_false{nullptr},
|
old_block->stack = old_stack;
|
||||||
.function_call{},
|
old_block->branch_true = new_block;
|
||||||
.return_block{},
|
old_block->branch_false = nullptr;
|
||||||
.branch_reg{},
|
|
||||||
.branch_offset{},
|
|
||||||
.indirect_branches{},
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Token OpcodeToken(Opcode opcode) {
|
Token OpcodeToken(Opcode opcode) {
|
||||||
|
@ -141,7 +137,7 @@ std::string NameOf(const Block& block) {
|
||||||
|
|
||||||
void Stack::Push(Token token, Location target) {
|
void Stack::Push(Token token, Location target) {
|
||||||
entries.push_back({
|
entries.push_back({
|
||||||
.token{token},
|
.token = token,
|
||||||
.target{target},
|
.target{target},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept {
|
||||||
}
|
}
|
||||||
|
|
||||||
Function::Function(ObjectPool<Block>& block_pool, Location start_address)
|
Function::Function(ObjectPool<Block>& block_pool, Location start_address)
|
||||||
: entrypoint{start_address}, labels{{
|
: entrypoint{start_address} {
|
||||||
.address{start_address},
|
Label& label{labels.emplace_back()};
|
||||||
.block{block_pool.Create(Block{
|
label.address = start_address;
|
||||||
.begin{start_address},
|
label.block = block_pool.Create(Block{});
|
||||||
.end{start_address},
|
label.block->begin = start_address;
|
||||||
.end_class{EndClass::Branch},
|
label.block->end = start_address;
|
||||||
.cond{true},
|
label.block->end_class = EndClass::Branch;
|
||||||
.stack{},
|
label.block->cond = IR::Condition(true);
|
||||||
.branch_true{nullptr},
|
label.block->branch_true = nullptr;
|
||||||
.branch_false{nullptr},
|
label.block->branch_false = nullptr;
|
||||||
.function_call{},
|
}
|
||||||
.return_block{},
|
|
||||||
.branch_reg{},
|
|
||||||
.branch_offset{},
|
|
||||||
.indirect_branches{},
|
|
||||||
})},
|
|
||||||
.stack{},
|
|
||||||
}} {}
|
|
||||||
|
|
||||||
CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address)
|
CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address)
|
||||||
: env{env_}, block_pool{block_pool_}, program_start{start_address} {
|
: env{env_}, block_pool{block_pool_}, program_start{start_address} {
|
||||||
|
@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
|
||||||
// Insert the function into the list if it doesn't exist
|
// Insert the function into the list if it doesn't exist
|
||||||
const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
|
const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
|
||||||
const bool exists{it != functions.end()};
|
const bool exists{it != functions.end()};
|
||||||
const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()};
|
const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
|
||||||
|
: functions.size()};
|
||||||
if (!exists) {
|
if (!exists) {
|
||||||
functions.emplace_back(block_pool, cal_pc);
|
functions.emplace_back(block_pool, cal_pc);
|
||||||
}
|
}
|
||||||
|
@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
|
||||||
}
|
}
|
||||||
// Create a virtual block and a conditional block
|
// Create a virtual block and a conditional block
|
||||||
Block* const conditional_block{block_pool.Create()};
|
Block* const conditional_block{block_pool.Create()};
|
||||||
Block virtual_block{
|
Block virtual_block{};
|
||||||
.begin{block->begin.Virtual()},
|
virtual_block.begin = block->begin.Virtual();
|
||||||
.end{block->begin.Virtual()},
|
virtual_block.end = block->begin.Virtual();
|
||||||
.end_class{EndClass::Branch},
|
virtual_block.end_class = EndClass::Branch;
|
||||||
.cond{cond},
|
virtual_block.stack = block->stack;
|
||||||
.stack{block->stack},
|
virtual_block.cond = cond;
|
||||||
.branch_true{conditional_block},
|
virtual_block.branch_true = conditional_block;
|
||||||
.branch_false{nullptr},
|
virtual_block.branch_false = nullptr;
|
||||||
.function_call{},
|
|
||||||
.return_block{},
|
|
||||||
.branch_reg{},
|
|
||||||
.branch_offset{},
|
|
||||||
.indirect_branches{},
|
|
||||||
};
|
|
||||||
// Save the contents of the visited block in the conditional block
|
// Save the contents of the visited block in the conditional block
|
||||||
*conditional_block = std::move(*block);
|
*conditional_block = std::move(*block);
|
||||||
// Impersonate the visited block with a virtual block
|
// Impersonate the visited block with a virtual block
|
||||||
|
@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst,
|
||||||
if (!is_absolute) {
|
if (!is_absolute) {
|
||||||
target += pc.Offset();
|
target += pc.Offset();
|
||||||
}
|
}
|
||||||
target += brx_table->branch_offset;
|
target += static_cast<unsigned int>(brx_table->branch_offset);
|
||||||
target += 8;
|
target += 8;
|
||||||
targets.push_back(target);
|
targets.push_back(target);
|
||||||
}
|
}
|
||||||
|
@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst,
|
||||||
for (const u32 target : targets) {
|
for (const u32 target : targets) {
|
||||||
Block* const branch{AddLabel(block, block->stack, target, function_id)};
|
Block* const branch{AddLabel(block, block->stack, target, function_id)};
|
||||||
block->indirect_branches.push_back({
|
block->indirect_branches.push_back({
|
||||||
.block{branch},
|
.block = branch,
|
||||||
.address{target},
|
.address = target,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
block->cond = IR::Condition{true};
|
block->cond = IR::Condition{true};
|
||||||
|
@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function
|
||||||
if (label_it != function.labels.end()) {
|
if (label_it != function.labels.end()) {
|
||||||
return label_it->block;
|
return label_it->block;
|
||||||
}
|
}
|
||||||
Block* const new_block{block_pool.Create(Block{
|
Block* const new_block{block_pool.Create()};
|
||||||
.begin{pc},
|
new_block->begin = pc;
|
||||||
.end{pc},
|
new_block->end = pc;
|
||||||
.end_class{EndClass::Branch},
|
new_block->end_class = EndClass::Branch;
|
||||||
.cond{true},
|
new_block->cond = IR::Condition(true);
|
||||||
.stack{stack},
|
new_block->stack = stack;
|
||||||
.branch_true{nullptr},
|
new_block->branch_true = nullptr;
|
||||||
.branch_false{nullptr},
|
new_block->branch_false = nullptr;
|
||||||
.function_call{},
|
|
||||||
.return_block{},
|
|
||||||
.branch_reg{},
|
|
||||||
.branch_offset{},
|
|
||||||
.indirect_branches{},
|
|
||||||
})};
|
|
||||||
function.labels.push_back(Label{
|
function.labels.push_back(Label{
|
||||||
.address{pc},
|
.address{pc},
|
||||||
.block{new_block},
|
.block = new_block,
|
||||||
.stack{std::move(stack)},
|
.stack{std::move(stack)},
|
||||||
});
|
});
|
||||||
return new_block;
|
return new_block;
|
||||||
|
|
|
@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
|
||||||
bit >>= 1;
|
bit >>= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return MaskValue{.mask{mask}, .value{value}};
|
return MaskValue{.mask = mask, .value = value};
|
||||||
}
|
}
|
||||||
|
|
||||||
struct InstEncoding {
|
struct InstEncoding {
|
||||||
|
@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{
|
||||||
#define INST(name, cute, encode) \
|
#define INST(name, cute, encode) \
|
||||||
InstEncoding{ \
|
InstEncoding{ \
|
||||||
.mask_value{MaskValueFromEncoding(encode)}, \
|
.mask_value{MaskValueFromEncoding(encode)}, \
|
||||||
.opcode{Opcode::name}, \
|
.opcode = Opcode::name, \
|
||||||
},
|
},
|
||||||
#include "maxwell.inc"
|
#include "maxwell.inc"
|
||||||
#undef INST
|
#undef INST
|
||||||
|
@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) {
|
||||||
const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
|
const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
|
||||||
if ((index & mask) == value) {
|
if ((index & mask) == value) {
|
||||||
encodings.at(element) = InstInfo{
|
encodings.at(element) = InstInfo{
|
||||||
.high_mask{static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT)},
|
.high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
|
||||||
.high_value{static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT)},
|
.high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
|
||||||
.opcode{encoding.opcode},
|
.opcode = encoding.opcode,
|
||||||
};
|
};
|
||||||
++element;
|
++element;
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,11 +97,11 @@ std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env
|
||||||
}
|
}
|
||||||
const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
|
const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
|
||||||
return IndirectBranchTableInfo{
|
return IndirectBranchTableInfo{
|
||||||
.cbuf_index{cbuf_index},
|
.cbuf_index = cbuf_index,
|
||||||
.cbuf_offset{cbuf_offset},
|
.cbuf_offset = cbuf_offset,
|
||||||
.num_entries{imnmx_immediate + 1},
|
.num_entries = imnmx_immediate + 1,
|
||||||
.branch_offset{brx_offset},
|
.branch_offset = brx_offset,
|
||||||
.branch_reg{brx_reg},
|
.branch_reg = brx_reg,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -558,7 +558,6 @@ private:
|
||||||
const Node label{goto_stmt->label};
|
const Node label{goto_stmt->label};
|
||||||
const u32 label_id{label->id};
|
const u32 label_id{label->id};
|
||||||
const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)};
|
const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)};
|
||||||
const auto type{label_nested_stmt->type};
|
|
||||||
|
|
||||||
Tree loop_body;
|
Tree loop_body;
|
||||||
loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
|
loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
|
||||||
|
@ -566,7 +565,7 @@ private:
|
||||||
Statement* const variable{pool.Create(Variable{}, label_id)};
|
Statement* const variable{pool.Create(Variable{}, label_id)};
|
||||||
Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
|
Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
|
||||||
UpdateTreeUp(loop_stmt);
|
UpdateTreeUp(loop_stmt);
|
||||||
const Node loop_node{body.insert(goto_stmt, *loop_stmt)};
|
body.insert(goto_stmt, *loop_stmt);
|
||||||
|
|
||||||
Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
|
Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
|
||||||
loop_stmt->children.push_front(*new_goto);
|
loop_stmt->children.push_front(*new_goto);
|
||||||
|
|
|
@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
|
||||||
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
|
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
|
||||||
|
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{CastFpRounding(dadd.fp_rounding)},
|
.rounding = CastFpRounding(dadd.fp_rounding),
|
||||||
.fmz_mode{IR::FmzMode::None},
|
.fmz_mode = IR::FmzMode::None,
|
||||||
};
|
};
|
||||||
|
|
||||||
v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
|
v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
|
||||||
|
|
|
@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s
|
||||||
const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
|
const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
|
||||||
|
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{CastFpRounding(dfma.fp_rounding)},
|
.rounding = CastFpRounding(dfma.fp_rounding),
|
||||||
.fmz_mode{IR::FmzMode::None},
|
.fmz_mode = IR::FmzMode::None,
|
||||||
};
|
};
|
||||||
|
|
||||||
v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
|
v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
|
||||||
|
|
|
@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
|
||||||
|
|
||||||
const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
|
const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{CastFpRounding(dmul.fp_rounding)},
|
.rounding = CastFpRounding(dmul.fp_rounding),
|
||||||
.fmz_mode{IR::FmzMode::None},
|
.fmz_mode = IR::FmzMode::None,
|
||||||
};
|
};
|
||||||
|
|
||||||
v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
|
v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
|
||||||
|
|
|
@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin
|
||||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
|
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
|
||||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
|
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
|
||||||
IR::FpControl control{
|
IR::FpControl control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{CastFpRounding(fp_rounding)},
|
.rounding = CastFpRounding(fp_rounding),
|
||||||
.fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
|
IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
|
||||||
if (sat) {
|
if (sat) {
|
||||||
|
|
|
@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o
|
||||||
} const fcmp{insn};
|
} const fcmp{insn};
|
||||||
|
|
||||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||||
const IR::F32 neg_zero{v.ir.Imm32(-0.0f)};
|
const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
|
||||||
const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}};
|
|
||||||
const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
|
const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
|
||||||
const IR::U32 src_reg{v.X(fcmp.src_reg)};
|
const IR::U32 src_reg{v.X(fcmp.src_reg)};
|
||||||
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
|
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
|
||||||
|
|
|
@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
|
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
|
||||||
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
|
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{false},
|
.no_contraction = false,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
|
|
||||||
IR::U1 pred{v.ir.GetPred(fset.pred)};
|
IR::U1 pred{v.ir.GetPred(fset.pred)};
|
||||||
|
|
|
@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
|
||||||
|
|
||||||
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
|
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
|
||||||
IR::FpControl fp_control{
|
IR::FpControl fp_control{
|
||||||
.no_contraction{false},
|
.no_contraction = false,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
if (f2f.src_size != f2f.dst_size) {
|
if (f2f.src_size != f2f.dst_size) {
|
||||||
fp_control.rounding = CastFpRounding(f2f.rounding);
|
fp_control.rounding = CastFpRounding(f2f.rounding);
|
||||||
|
|
|
@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
||||||
fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
|
fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
|
||||||
}
|
}
|
||||||
const IR::FpControl fp_control{
|
const IR::FpControl fp_control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{fmz_mode},
|
.fmz_mode = fmz_mode,
|
||||||
};
|
};
|
||||||
const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
|
const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
|
||||||
const IR::F16F32F64 rounded_value{[&] {
|
const IR::F16F32F64 rounded_value{[&] {
|
||||||
|
@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
||||||
} else if (f2i.dest_format == DestFormat::I64) {
|
} else if (f2i.dest_format == DestFormat::I64) {
|
||||||
handled_special_case = true;
|
handled_special_case = true;
|
||||||
result = IR::U64{
|
result = IR::U64{
|
||||||
v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)};
|
v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!handled_special_case && is_signed) {
|
if (!handled_special_case && is_signed) {
|
||||||
if (bitsize != 64) {
|
if (bitsize != 64) {
|
||||||
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
|
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
|
||||||
} else {
|
} else {
|
||||||
result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)};
|
result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
||||||
|
|
||||||
void TranslatorVisitor::F2I_reg(u64 insn) {
|
void TranslatorVisitor::F2I_reg(u64 insn) {
|
||||||
union {
|
union {
|
||||||
|
u64 raw;
|
||||||
F2I base;
|
F2I base;
|
||||||
BitField<20, 8, IR::Reg> src_reg;
|
BitField<20, 8, IR::Reg> src_reg;
|
||||||
} const f2i{insn};
|
} const f2i{insn};
|
||||||
|
|
|
@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
|
||||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
|
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
|
||||||
const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
|
const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
|
||||||
const IR::FpControl fp_control{
|
const IR::FpControl fp_control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{CastFpRounding(fp_rounding)},
|
.rounding = CastFpRounding(fp_rounding),
|
||||||
.fmz_mode{CastFmzMode(fmz_mode)},
|
.fmz_mode = CastFmzMode(fmz_mode),
|
||||||
};
|
};
|
||||||
IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
|
IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
|
||||||
if (fmz_mode == FmzMode::FMZ && !sat) {
|
if (fmz_mode == FmzMode::FMZ && !sat) {
|
||||||
|
|
|
@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
|
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
|
||||||
|
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{false},
|
.no_contraction = false,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
|
IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
|
||||||
IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
|
IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
|
||||||
|
|
|
@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode
|
||||||
}
|
}
|
||||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
|
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
|
||||||
const IR::FpControl fp_control{
|
const IR::FpControl fp_control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{CastFpRounding(fp_rounding)},
|
.rounding = CastFpRounding(fp_rounding),
|
||||||
.fmz_mode{CastFmzMode(fmz_mode)},
|
.fmz_mode = CastFmzMode(fmz_mode),
|
||||||
};
|
};
|
||||||
IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
|
IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
|
||||||
if (fmz_mode == FmzMode::FMZ && !sat) {
|
if (fmz_mode == FmzMode::FMZ && !sat) {
|
||||||
|
|
|
@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
|
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
|
||||||
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
|
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{false},
|
.no_contraction = false,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
|
|
||||||
const BooleanOp bop{fsetp.bop};
|
const BooleanOp bop{fsetp.bop};
|
||||||
|
|
|
@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) {
|
||||||
const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
|
const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
|
||||||
|
|
||||||
const IR::FpControl fp_control{
|
const IR::FpControl fp_control{
|
||||||
.no_contraction{false},
|
.no_contraction = false,
|
||||||
.rounding{CastFpRounding(fswzadd.round)},
|
.rounding = CastFpRounding(fswzadd.round),
|
||||||
.fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
|
|
||||||
const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
|
const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
|
||||||
|
|
|
@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool
|
||||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||||
|
|
||||||
const IR::FpControl fp_control{
|
const IR::FpControl fp_control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
|
IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
|
||||||
IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
|
IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
|
||||||
|
@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) {
|
||||||
BitField<20, 9, u64> low;
|
BitField<20, 9, u64> low;
|
||||||
} const hadd2{insn};
|
} const hadd2{insn};
|
||||||
|
|
||||||
const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) |
|
const u32 imm{
|
||||||
static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)};
|
static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
|
||||||
|
static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
|
||||||
HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
|
HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool
|
||||||
rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
|
rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
|
||||||
|
|
||||||
const IR::FpControl fp_control{
|
const IR::FpControl fp_control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{HalfPrecision2FmzMode(precision)},
|
.fmz_mode = HalfPrecision2FmzMode(precision),
|
||||||
};
|
};
|
||||||
IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
|
IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
|
||||||
IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
|
IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
|
||||||
|
@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) {
|
||||||
BitField<57, 2, HalfPrecision> precision;
|
BitField<57, 2, HalfPrecision> precision;
|
||||||
} const hfma2{insn};
|
} const hfma2{insn};
|
||||||
|
|
||||||
const u32 imm{static_cast<u32>(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) |
|
const u32 imm{
|
||||||
static_cast<u32>(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)};
|
static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
|
||||||
|
static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
|
||||||
|
|
||||||
HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
|
HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
|
||||||
GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
|
GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
|
||||||
|
|
|
@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo
|
||||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||||
|
|
||||||
const IR::FpControl fp_control{
|
const IR::FpControl fp_control{
|
||||||
.no_contraction{true},
|
.no_contraction = true,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{HalfPrecision2FmzMode(precision)},
|
.fmz_mode = HalfPrecision2FmzMode(precision),
|
||||||
};
|
};
|
||||||
IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
|
IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
|
||||||
IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
|
IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
|
||||||
|
@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) {
|
||||||
BitField<44, 1, u64> abs_a;
|
BitField<44, 1, u64> abs_a;
|
||||||
} const hmul2{insn};
|
} const hmul2{insn};
|
||||||
|
|
||||||
const u32 imm{static_cast<u32>(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) |
|
const u32 imm{
|
||||||
static_cast<u32>(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)};
|
static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
|
||||||
|
static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
|
||||||
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
|
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
|
||||||
Swizzle::H1_H0, ir.Imm32(imm));
|
Swizzle::H1_H0, ir.Imm32(imm));
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f
|
||||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||||
|
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{false},
|
.no_contraction = false,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
|
|
||||||
IR::U1 pred{v.ir.GetPred(hset2.pred)};
|
IR::U1 pred{v.ir.GetPred(hset2.pred)};
|
||||||
|
@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) {
|
||||||
BitField<20, 9, u64> low;
|
BitField<20, 9, u64> low;
|
||||||
} const hset2{insn};
|
} const hset2{insn};
|
||||||
|
|
||||||
const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) |
|
const u32 imm{
|
||||||
static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)};
|
static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
|
||||||
|
static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
|
||||||
|
|
||||||
HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
|
HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
|
||||||
Swizzle::H1_H0);
|
Swizzle::H1_H0);
|
||||||
|
|
|
@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo
|
||||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||||
|
|
||||||
const IR::FpControl control{
|
const IR::FpControl control{
|
||||||
.no_contraction{false},
|
.no_contraction = false,
|
||||||
.rounding{IR::FpRounding::DontCare},
|
.rounding = IR::FpRounding::DontCare,
|
||||||
.fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
|
.fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||||
};
|
};
|
||||||
|
|
||||||
IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
|
IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
|
||||||
|
@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) {
|
||||||
BitField<20, 9, u64> low;
|
BitField<20, 9, u64> low;
|
||||||
} const hsetp2{insn};
|
} const hsetp2{insn};
|
||||||
|
|
||||||
const u32 imm{static_cast<u32>(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
|
const u32 imm{static_cast<u32>(hsetp2.low << 6) |
|
||||||
static_cast<u32>(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
|
static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
|
||||||
|
static_cast<u32>(hsetp2.high << 22) |
|
||||||
|
static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
|
||||||
|
|
||||||
HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
|
HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
|
||||||
hsetp2.h_and != 0);
|
hsetp2.h_and != 0);
|
||||||
|
|
|
@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
|
||||||
}
|
}
|
||||||
const IR::Value result{ir.UnpackUint2x32(value)};
|
const IR::Value result{ir.UnpackUint2x32(value)};
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
|
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
|
||||||
}
|
}
|
||||||
const IR::Value result{ir.UnpackDouble2x32(value)};
|
const IR::Value result{ir.UnpackDouble2x32(value)};
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
|
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
|
||||||
const auto [binding, offset_value]{CbufAddr(insn)};
|
const auto [binding, offset_value]{CbufAddr(insn)};
|
||||||
const bool unaligned{cbuf.unaligned != 0};
|
const bool unaligned{cbuf.unaligned != 0};
|
||||||
const u32 offset{offset_value.U32()};
|
const u32 offset{offset_value.U32()};
|
||||||
const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4};
|
const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
|
||||||
|
|
||||||
const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
|
const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
|
||||||
const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
|
const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
|
||||||
|
@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
|
||||||
BitField<20, 19, u64> value;
|
BitField<20, 19, u64> value;
|
||||||
BitField<56, 1, u64> is_negative;
|
BitField<56, 1, u64> is_negative;
|
||||||
} const imm{insn};
|
} const imm{insn};
|
||||||
const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0};
|
const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
|
||||||
const u32 value{static_cast<u32>(imm.value) << 12};
|
const u32 value{static_cast<u32>(imm.value) << 12};
|
||||||
return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
|
return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
|
||||||
} const iadd{insn};
|
} const iadd{insn};
|
||||||
|
|
||||||
const bool po{iadd.three_for_po == 3};
|
const bool po{iadd.three_for_po == 3};
|
||||||
const bool neg_a{!po && iadd.neg_a != 0};
|
|
||||||
if (!po && iadd.neg_b != 0) {
|
if (!po && iadd.neg_b != 0) {
|
||||||
op_b = v.ir.INeg(op_b);
|
op_b = v.ir.INeg(op_b);
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
|
||||||
}
|
}
|
||||||
const IR::Value vector{v.ir.UnpackDouble2x32(value)};
|
const IR::Value vector{v.ir.UnpackDouble2x32(value)};
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)});
|
v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) {
|
||||||
}
|
}
|
||||||
const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
|
const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
|
X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,6 @@ std::pair<int, bool> GetSize(u64 insn) {
|
||||||
BitField<48, 3, Size> size;
|
BitField<48, 3, Size> size;
|
||||||
} const encoding{insn};
|
} const encoding{insn};
|
||||||
|
|
||||||
const Size nnn = encoding.size;
|
|
||||||
switch (encoding.size) {
|
switch (encoding.size) {
|
||||||
case Size::U8:
|
case Size::U8:
|
||||||
return {8, false};
|
return {8, false};
|
||||||
|
@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) {
|
||||||
case 32:
|
case 32:
|
||||||
case 64:
|
case 64:
|
||||||
case 128:
|
case 128:
|
||||||
if (!IR::IsAligned(dest, bit_size / 32)) {
|
if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
|
||||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||||
}
|
}
|
||||||
X(dest, ir.LoadLocal(word_offset));
|
X(dest, ir.LoadLocal(word_offset));
|
||||||
|
@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) {
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
case 128:
|
case 128:
|
||||||
if (!IR::IsAligned(dest, bit_size / 32)) {
|
if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
|
||||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||||
}
|
}
|
||||||
for (int element = 0; element < bit_size / 32; ++element) {
|
for (int element = 0; element < bit_size / 32; ++element) {
|
||||||
X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
|
X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) {
|
||||||
case 32:
|
case 32:
|
||||||
case 64:
|
case 64:
|
||||||
case 128:
|
case 128:
|
||||||
if (!IR::IsAligned(reg, bit_size / 32)) {
|
if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
|
||||||
throw NotImplementedException("Unaligned source register");
|
throw NotImplementedException("Unaligned source register");
|
||||||
}
|
}
|
||||||
ir.WriteLocal(word_offset, src);
|
ir.WriteLocal(word_offset, src);
|
||||||
|
|
|
@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) {
|
||||||
}
|
}
|
||||||
const IR::Value vector{ir.LoadGlobal64(address)};
|
const IR::Value vector{ir.LoadGlobal64(address)};
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
|
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) {
|
||||||
}
|
}
|
||||||
const IR::Value vector{ir.LoadGlobal128(address)};
|
const IR::Value vector{ir.LoadGlobal128(address)};
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
|
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
|
||||||
if (tex.dc != 0) {
|
if (tex.dc != 0) {
|
||||||
value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
|
value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
|
||||||
} else {
|
} else {
|
||||||
value = IR::F32{v.ir.CompositeExtract(sample, element)};
|
value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
|
||||||
}
|
}
|
||||||
v.F(dest_reg, value);
|
v.F(dest_reg, value);
|
||||||
++dest_reg;
|
++dest_reg;
|
||||||
|
|
|
@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{
|
||||||
R | G | B | A, //
|
R | G | B | A, //
|
||||||
};
|
};
|
||||||
|
|
||||||
void CheckAlignment(IR::Reg reg, int alignment) {
|
void CheckAlignment(IR::Reg reg, size_t alignment) {
|
||||||
if (!IR::IsAligned(reg, alignment)) {
|
if (!IR::IsAligned(reg, alignment)) {
|
||||||
throw NotImplementedException("Unaligned source register {}", reg);
|
throw NotImplementedException("Unaligned source register {}", reg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ union Encoding {
|
||||||
BitField<36, 13, u64> cbuf_offset;
|
BitField<36, 13, u64> cbuf_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
void CheckAlignment(IR::Reg reg, int alignment) {
|
void CheckAlignment(IR::Reg reg, size_t alignment) {
|
||||||
if (!IR::IsAligned(reg, alignment)) {
|
if (!IR::IsAligned(reg, alignment)) {
|
||||||
throw NotImplementedException("Unaligned source register {}", reg);
|
throw NotImplementedException("Unaligned source register {}", reg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,7 @@ union Encoding {
|
||||||
BitField<53, 4, u64> encoding;
|
BitField<53, 4, u64> encoding;
|
||||||
};
|
};
|
||||||
|
|
||||||
void CheckAlignment(IR::Reg reg, int alignment) {
|
void CheckAlignment(IR::Reg reg, size_t alignment) {
|
||||||
if (!IR::IsAligned(reg, alignment)) {
|
if (!IR::IsAligned(reg, alignment)) {
|
||||||
throw NotImplementedException("Unaligned source register {}", reg);
|
throw NotImplementedException("Unaligned source register {}", reg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
|
||||||
if (((txq.mask >> element) & 1) == 0) {
|
if (((txq.mask >> element) & 1) == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)});
|
v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
|
||||||
++dest_reg;
|
++dest_reg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) {
|
||||||
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
|
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
|
||||||
|
|
||||||
const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
|
const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
|
||||||
const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vsetp.src_b_selector)};
|
|
||||||
const VideoWidth a_width{vsetp.src_a_width};
|
const VideoWidth a_width{vsetp.src_a_width};
|
||||||
const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
|
const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "shader_recompiler/frontend/ir/microinstruction.h"
|
#include "shader_recompiler/frontend/ir/microinstruction.h"
|
||||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||||
#include "shader_recompiler/frontend/ir/program.h"
|
#include "shader_recompiler/frontend/ir/program.h"
|
||||||
|
#include "shader_recompiler/ir_opt/passes.h"
|
||||||
#include "shader_recompiler/shader_info.h"
|
#include "shader_recompiler/shader_info.h"
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
|
||||||
auto& cbufs{info.constant_buffer_descriptors};
|
auto& cbufs{info.constant_buffer_descriptors};
|
||||||
cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
|
cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
|
||||||
ConstantBufferDescriptor{
|
ConstantBufferDescriptor{
|
||||||
.index{index},
|
.index = index,
|
||||||
.count{1},
|
.count = 1,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VisitUsages(Info& info, IR::Inst& inst) {
|
void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::CompositeConstructF16x2:
|
case IR::Opcode::CompositeConstructF16x2:
|
||||||
case IR::Opcode::CompositeConstructF16x3:
|
case IR::Opcode::CompositeConstructF16x3:
|
||||||
case IR::Opcode::CompositeConstructF16x4:
|
case IR::Opcode::CompositeConstructF16x4:
|
||||||
|
@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::GetCbufU8:
|
case IR::Opcode::GetCbufU8:
|
||||||
case IR::Opcode::GetCbufS8:
|
case IR::Opcode::GetCbufS8:
|
||||||
case IR::Opcode::UndefU8:
|
case IR::Opcode::UndefU8:
|
||||||
|
@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::GetCbufU16:
|
case IR::Opcode::GetCbufU16:
|
||||||
case IR::Opcode::GetCbufS16:
|
case IR::Opcode::GetCbufS16:
|
||||||
case IR::Opcode::UndefU16:
|
case IR::Opcode::UndefU16:
|
||||||
|
@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::UndefU64:
|
case IR::Opcode::UndefU64:
|
||||||
case IR::Opcode::LoadGlobalU8:
|
case IR::Opcode::LoadGlobalU8:
|
||||||
case IR::Opcode::LoadGlobalS8:
|
case IR::Opcode::LoadGlobalS8:
|
||||||
|
@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::DemoteToHelperInvocation:
|
case IR::Opcode::DemoteToHelperInvocation:
|
||||||
info.uses_demote_to_helper_invocation = true;
|
info.uses_demote_to_helper_invocation = true;
|
||||||
break;
|
break;
|
||||||
|
@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
} else {
|
} else {
|
||||||
throw NotImplementedException("Constant buffer with non-immediate index");
|
throw NotImplementedException("Constant buffer with non-immediate index");
|
||||||
}
|
}
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::GetCbufU8:
|
case IR::Opcode::GetCbufU8:
|
||||||
case IR::Opcode::GetCbufS8:
|
case IR::Opcode::GetCbufS8:
|
||||||
info.used_constant_buffer_types |= IR::Type::U8;
|
info.used_constant_buffer_types |= IR::Type::U8;
|
||||||
|
@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VisitFpModifiers(Info& info, IR::Inst& inst) {
|
void VisitFpModifiers(Info& info, IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::FPAdd16:
|
case IR::Opcode::FPAdd16:
|
||||||
case IR::Opcode::FPFma16:
|
case IR::Opcode::FPFma16:
|
||||||
case IR::Opcode::FPMul16:
|
case IR::Opcode::FPMul16:
|
||||||
|
@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
|
||||||
info.stores_position |= header.vtg.omap_systemb.position != 0;
|
info.stores_position |= header.vtg.omap_systemb.position != 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
void CollectShaderInfoPass(Environment& env, IR::Program& program) {
|
void CollectShaderInfoPass(Environment& env, IR::Program& program) {
|
||||||
|
|
|
@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
|
||||||
}
|
}
|
||||||
if (is_lhs_immediate && !is_rhs_immediate) {
|
if (is_lhs_immediate && !is_rhs_immediate) {
|
||||||
IR::Inst* const rhs_inst{rhs.InstRecursive()};
|
IR::Inst* const rhs_inst{rhs.InstRecursive()};
|
||||||
if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) {
|
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
|
||||||
const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
|
const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
|
||||||
inst.SetArg(0, rhs_inst->Arg(0));
|
inst.SetArg(0, rhs_inst->Arg(0));
|
||||||
inst.SetArg(1, IR::Value{combined});
|
inst.SetArg(1, IR::Value{combined});
|
||||||
|
@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
|
||||||
}
|
}
|
||||||
if (!is_lhs_immediate && is_rhs_immediate) {
|
if (!is_lhs_immediate && is_rhs_immediate) {
|
||||||
const IR::Inst* const lhs_inst{lhs.InstRecursive()};
|
const IR::Inst* const lhs_inst{lhs.InstRecursive()};
|
||||||
if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) {
|
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
|
||||||
const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
|
const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
|
||||||
inst.SetArg(0, lhs_inst->Arg(0));
|
inst.SetArg(0, lhs_inst->Arg(0));
|
||||||
inst.SetArg(1, IR::Value{combined});
|
inst.SetArg(1, IR::Value{combined});
|
||||||
|
@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
|
IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
|
||||||
if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) {
|
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
|
||||||
|
lhs_shl->Arg(1) != IR::Value{16U}) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (lhs_shl->Arg(0).IsImmediate()) {
|
if (lhs_shl->Arg(0).IsImmediate()) {
|
||||||
|
@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
|
IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
|
||||||
IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
|
IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
|
||||||
if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) {
|
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
|
if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
|
||||||
|
@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
|
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
|
||||||
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
|
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
|
||||||
if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) {
|
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) {
|
if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
|
if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
|
||||||
|
@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) {
|
||||||
// ISub32 is generally used to subtract two constant buffers, compare and replace this with
|
// ISub32 is generally used to subtract two constant buffers, compare and replace this with
|
||||||
// zero if they equal.
|
// zero if they equal.
|
||||||
const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
|
const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
|
||||||
return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 &&
|
return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
|
||||||
a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1);
|
b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
|
||||||
|
a->Arg(1) == b->Arg(1);
|
||||||
}};
|
}};
|
||||||
IR::Inst* op_a{inst.Arg(0).InstRecursive()};
|
IR::Inst* op_a{inst.Arg(0).InstRecursive()};
|
||||||
IR::Inst* op_b{inst.Arg(1).InstRecursive()};
|
IR::Inst* op_b{inst.Arg(1).InstRecursive()};
|
||||||
|
@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// It's also possible a value is being added to a cbuf and then subtracted
|
// It's also possible a value is being added to a cbuf and then subtracted
|
||||||
if (op_b->Opcode() == IR::Opcode::IAdd32) {
|
if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
|
||||||
// Canonicalize local variables to simplify the following logic
|
// Canonicalize local variables to simplify the following logic
|
||||||
std::swap(op_a, op_b);
|
std::swap(op_a, op_b);
|
||||||
}
|
}
|
||||||
if (op_b->Opcode() != IR::Opcode::GetCbufU32) {
|
if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
IR::Inst* const inst_cbuf{op_b};
|
IR::Inst* const inst_cbuf{op_b};
|
||||||
if (op_a->Opcode() != IR::Opcode::IAdd32) {
|
if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
IR::Value add_op_a{op_a->Arg(0)};
|
IR::Value add_op_a{op_a->Arg(0)};
|
||||||
|
@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
||||||
IR::Inst* const rhs_op{rhs_value.InstRecursive()};
|
IR::Inst* const rhs_op{rhs_value.InstRecursive()};
|
||||||
if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) {
|
if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
|
||||||
|
rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const IR::Value recip_source{rhs_op->Arg(0)};
|
const IR::Value recip_source{rhs_op->Arg(0)};
|
||||||
|
@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
IR::Inst* const attr_a{recip_source.InstRecursive()};
|
IR::Inst* const attr_a{recip_source.InstRecursive()};
|
||||||
IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
|
IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
|
||||||
if (attr_a->Opcode() != IR::Opcode::GetAttribute ||
|
if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
|
||||||
attr_b->Opcode() != IR::Opcode::GetAttribute) {
|
attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
|
if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
|
||||||
|
@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
IR::Inst* const arg{value.InstRecursive()};
|
IR::Inst* const arg{value.InstRecursive()};
|
||||||
if (arg->Opcode() == IR::Opcode::LogicalNot) {
|
if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
|
||||||
inst.ReplaceUsesWith(arg->Arg(0));
|
inst.ReplaceUsesWith(arg->Arg(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
IR::Inst* const arg_inst{value.InstRecursive()};
|
IR::Inst* const arg_inst{value.InstRecursive()};
|
||||||
if (arg_inst->Opcode() == reverse) {
|
if (arg_inst->GetOpcode() == reverse) {
|
||||||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if constexpr (op == IR::Opcode::BitCastF32U32) {
|
if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||||
if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) {
|
if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||||
// Replace the bitcast with a typed constant buffer read
|
// Replace the bitcast with a typed constant buffer read
|
||||||
inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
|
inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
|
||||||
inst.SetArg(0, arg_inst->Arg(0));
|
inst.SetArg(0, arg_inst->Arg(0));
|
||||||
|
@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
IR::Inst* const arg_inst{value.InstRecursive()};
|
IR::Inst* const arg_inst{value.InstRecursive()};
|
||||||
if (arg_inst->Opcode() == reverse) {
|
if (arg_inst->GetOpcode() == reverse) {
|
||||||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
|
||||||
template <typename Func, size_t... I>
|
template <typename Func, size_t... I>
|
||||||
IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
|
IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
|
||||||
using Traits = LambdaTraits<decltype(func)>;
|
using Traits = LambdaTraits<decltype(func)>;
|
||||||
return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)};
|
return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldBranchConditional(IR::Inst& inst) {
|
void FoldBranchConditional(IR::Inst& inst) {
|
||||||
|
@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const IR::Inst* cond_inst{cond.InstRecursive()};
|
const IR::Inst* cond_inst{cond.InstRecursive()};
|
||||||
if (cond_inst->Opcode() == IR::Opcode::LogicalNot) {
|
if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) {
|
||||||
const IR::Value true_label{inst.Arg(1)};
|
const IR::Value true_label{inst.Arg(1)};
|
||||||
const IR::Value false_label{inst.Arg(2)};
|
const IR::Value false_label{inst.Arg(2)};
|
||||||
// Remove negation on the conditional (take the parameter out of LogicalNot) and swap
|
// Remove negation on the conditional (take the parameter out of LogicalNot) and swap
|
||||||
|
@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) {
|
||||||
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
||||||
IR::Opcode construct, u32 first_index) {
|
IR::Opcode construct, u32 first_index) {
|
||||||
IR::Inst* const inst{inst_value.InstRecursive()};
|
IR::Inst* const inst{inst_value.InstRecursive()};
|
||||||
if (inst->Opcode() == construct) {
|
if (inst->GetOpcode() == construct) {
|
||||||
return inst->Arg(first_index);
|
return inst->Arg(first_index);
|
||||||
}
|
}
|
||||||
if (inst->Opcode() != insert) {
|
if (inst->GetOpcode() != insert) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
IR::Value value_index{inst->Arg(2)};
|
IR::Value value_index{inst->Arg(2)};
|
||||||
|
@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::GetRegister:
|
case IR::Opcode::GetRegister:
|
||||||
return FoldGetRegister(inst);
|
return FoldGetRegister(inst);
|
||||||
case IR::Opcode::GetPred:
|
case IR::Opcode::GetPred:
|
||||||
|
|
|
@ -57,7 +57,7 @@ struct StorageInfo {
|
||||||
|
|
||||||
/// Returns true when the instruction is a global memory instruction
|
/// Returns true when the instruction is a global memory instruction
|
||||||
bool IsGlobalMemory(const IR::Inst& inst) {
|
bool IsGlobalMemory(const IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::LoadGlobalS8:
|
case IR::Opcode::LoadGlobalS8:
|
||||||
case IR::Opcode::LoadGlobalU8:
|
case IR::Opcode::LoadGlobalU8:
|
||||||
case IR::Opcode::LoadGlobalS16:
|
case IR::Opcode::LoadGlobalS16:
|
||||||
|
@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) {
|
||||||
|
|
||||||
/// Returns true when the instruction is a global memory instruction
|
/// Returns true when the instruction is a global memory instruction
|
||||||
bool IsGlobalMemoryWrite(const IR::Inst& inst) {
|
bool IsGlobalMemoryWrite(const IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::WriteGlobalS8:
|
case IR::Opcode::WriteGlobalS8:
|
||||||
case IR::Opcode::WriteGlobalU8:
|
case IR::Opcode::WriteGlobalU8:
|
||||||
case IR::Opcode::WriteGlobalS16:
|
case IR::Opcode::WriteGlobalS16:
|
||||||
|
@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
|
||||||
void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
|
void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
const IR::Value zero{u32{0}};
|
const IR::Value zero{u32{0}};
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::LoadGlobalS8:
|
case IR::Opcode::LoadGlobalS8:
|
||||||
case IR::Opcode::LoadGlobalU8:
|
case IR::Opcode::LoadGlobalU8:
|
||||||
case IR::Opcode::LoadGlobalS16:
|
case IR::Opcode::LoadGlobalS16:
|
||||||
|
@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
|
||||||
inst.Invalidate();
|
inst.Invalidate();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode());
|
throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,7 +184,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||||
// This address is expected to either be a PackUint2x32 or a IAdd64
|
// This address is expected to either be a PackUint2x32 or a IAdd64
|
||||||
IR::Inst* addr_inst{addr.InstRecursive()};
|
IR::Inst* addr_inst{addr.InstRecursive()};
|
||||||
s32 imm_offset{0};
|
s32 imm_offset{0};
|
||||||
if (addr_inst->Opcode() == IR::Opcode::IAdd64) {
|
if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
|
||||||
// If it's an IAdd64, get the immediate offset it is applying and grab the address
|
// If it's an IAdd64, get the immediate offset it is applying and grab the address
|
||||||
// instruction. This expects for the instruction to be canonicalized having the address on
|
// instruction. This expects for the instruction to be canonicalized having the address on
|
||||||
// the first argument and the immediate offset on the second one.
|
// the first argument and the immediate offset on the second one.
|
||||||
|
@ -200,7 +200,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||||
addr_inst = iadd_addr.Inst();
|
addr_inst = iadd_addr.Inst();
|
||||||
}
|
}
|
||||||
// With IAdd64 handled, now PackUint2x32 is expected without exceptions
|
// With IAdd64 handled, now PackUint2x32 is expected without exceptions
|
||||||
if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) {
|
if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
// PackUint2x32 is expected to be generated from a vector
|
// PackUint2x32 is expected to be generated from a vector
|
||||||
|
@ -210,20 +210,20 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
// This vector is expected to be a CompositeConstructU32x2
|
// This vector is expected to be a CompositeConstructU32x2
|
||||||
IR::Inst* const vector_inst{vector.InstRecursive()};
|
IR::Inst* const vector_inst{vector.InstRecursive()};
|
||||||
if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) {
|
if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
// Grab the first argument from the CompositeConstructU32x2, this is the low address.
|
// Grab the first argument from the CompositeConstructU32x2, this is the low address.
|
||||||
return LowAddrInfo{
|
return LowAddrInfo{
|
||||||
.value{IR::U32{vector_inst->Arg(0)}},
|
.value{IR::U32{vector_inst->Arg(0)}},
|
||||||
.imm_offset{imm_offset},
|
.imm_offset = imm_offset,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tries to track the storage buffer address used by a global memory instruction
|
/// Tries to track the storage buffer address used by a global memory instruction
|
||||||
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
|
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
|
||||||
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
|
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
|
||||||
if (inst->Opcode() != IR::Opcode::GetCbufU32) {
|
if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
const IR::Value index{inst->Arg(0)};
|
const IR::Value index{inst->Arg(0)};
|
||||||
|
@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
||||||
// NVN puts storage buffers in a specific range, we have to bias towards these addresses to
|
// NVN puts storage buffers in a specific range, we have to bias towards these addresses to
|
||||||
// avoid getting false positives
|
// avoid getting false positives
|
||||||
static constexpr Bias nvn_bias{
|
static constexpr Bias nvn_bias{
|
||||||
.index{0},
|
.index = 0,
|
||||||
.offset_begin{0x110},
|
.offset_begin = 0x110,
|
||||||
.offset_end{0x610},
|
.offset_end = 0x610,
|
||||||
};
|
};
|
||||||
// Track the low address of the instruction
|
// Track the low address of the instruction
|
||||||
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
||||||
|
@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
||||||
info.set.insert(*storage_buffer);
|
info.set.insert(*storage_buffer);
|
||||||
info.to_replace.push_back(StorageInst{
|
info.to_replace.push_back(StorageInst{
|
||||||
.storage_buffer{*storage_buffer},
|
.storage_buffer{*storage_buffer},
|
||||||
.inst{&inst},
|
.inst = &inst,
|
||||||
.block{&block},
|
.block = &block,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
|
||||||
/// Replace a global memory load instruction with its storage buffer equivalent
|
/// Replace a global memory load instruction with its storage buffer equivalent
|
||||||
void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||||
const IR::U32& offset) {
|
const IR::U32& offset) {
|
||||||
const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
|
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
|
const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
|
||||||
inst.ReplaceUsesWith(value);
|
inst.ReplaceUsesWith(value);
|
||||||
|
@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||||
/// Replace a global memory write instruction with its storage buffer equivalent
|
/// Replace a global memory write instruction with its storage buffer equivalent
|
||||||
void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||||
const IR::U32& offset) {
|
const IR::U32& offset) {
|
||||||
const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
|
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
|
block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
|
||||||
inst.Invalidate();
|
inst.Invalidate();
|
||||||
|
@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
|
||||||
/// Replace a global memory instruction with its storage buffer equivalent
|
/// Replace a global memory instruction with its storage buffer equivalent
|
||||||
void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||||
const IR::U32& offset) {
|
const IR::U32& offset) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::LoadGlobalS8:
|
case IR::Opcode::LoadGlobalS8:
|
||||||
case IR::Opcode::LoadGlobalU8:
|
case IR::Opcode::LoadGlobalU8:
|
||||||
case IR::Opcode::LoadGlobalS16:
|
case IR::Opcode::LoadGlobalS16:
|
||||||
|
@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||||
case IR::Opcode::WriteGlobal128:
|
case IR::Opcode::WriteGlobal128:
|
||||||
return ReplaceWrite(block, inst, storage_index, offset);
|
return ReplaceWrite(block, inst, storage_index, offset);
|
||||||
default:
|
default:
|
||||||
throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode());
|
throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
||||||
u32 storage_index{};
|
u32 storage_index{};
|
||||||
for (const StorageBufferAddr& storage_buffer : info.set) {
|
for (const StorageBufferAddr& storage_buffer : info.set) {
|
||||||
program.info.storage_buffers_descriptors.push_back({
|
program.info.storage_buffers_descriptors.push_back({
|
||||||
.cbuf_index{storage_buffer.index},
|
.cbuf_index = storage_buffer.index,
|
||||||
.cbuf_offset{storage_buffer.offset},
|
.cbuf_offset = storage_buffer.offset,
|
||||||
.count{1},
|
.count = 1,
|
||||||
.is_written{info.writes.contains(storage_buffer)},
|
.is_written{info.writes.contains(storage_buffer)},
|
||||||
});
|
});
|
||||||
++storage_index;
|
++storage_index;
|
||||||
|
|
|
@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) {
|
||||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
|
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||||
|
inst->GetOpcode() == IR::Opcode::Void) {
|
||||||
to_invalidate.push_back(&*inst);
|
to_invalidate.push_back(&*inst);
|
||||||
inst = block->Instructions().erase(inst);
|
inst = block->Instructions().erase(inst);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) {
|
||||||
void LowerFp16ToFp32(IR::Program& program) {
|
void LowerFp16ToFp32(IR::Program& program) {
|
||||||
for (IR::Block* const block : program.blocks) {
|
for (IR::Block* const block : program.blocks) {
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
inst.ReplaceOpcode(Replace(inst.Opcode()));
|
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
|
[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
|
||||||
return inst.Opcode() == IR::Opcode::Phi;
|
return inst.GetOpcode() == IR::Opcode::Phi;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum class Status {
|
enum class Status {
|
||||||
|
@ -278,7 +278,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::SetRegister:
|
case IR::Opcode::SetRegister:
|
||||||
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
|
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
|
||||||
pass.WriteVariable(reg, block, inst.Arg(1));
|
pass.WriteVariable(reg, block, inst.Arg(1));
|
||||||
|
|
|
@ -30,7 +30,7 @@ struct TextureInst {
|
||||||
using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
|
using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
|
||||||
|
|
||||||
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||||
case IR::Opcode::BoundImageSampleImplicitLod:
|
case IR::Opcode::BoundImageSampleImplicitLod:
|
||||||
return IR::Opcode::ImageSampleImplicitLod;
|
return IR::Opcode::ImageSampleImplicitLod;
|
||||||
|
@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsBindless(const IR::Inst& inst) {
|
bool IsBindless(const IR::Inst& inst) {
|
||||||
switch (inst.Opcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||||
case IR::Opcode::BindlessImageSampleExplicitLod:
|
case IR::Opcode::BindlessImageSampleExplicitLod:
|
||||||
case IR::Opcode::BindlessImageSampleDrefImplicitLod:
|
case IR::Opcode::BindlessImageSampleDrefImplicitLod:
|
||||||
|
@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) {
|
||||||
case IR::Opcode::BoundImageGradient:
|
case IR::Opcode::BoundImageGradient:
|
||||||
return false;
|
return false;
|
||||||
default:
|
default:
|
||||||
throw InvalidArgument("Invalid opcode {}", inst.Opcode());
|
throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
|
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
|
||||||
if (inst->Opcode() != IR::Opcode::GetCbufU32) {
|
if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
const IR::Value index{inst->Arg(0)};
|
const IR::Value index{inst->Arg(0)};
|
||||||
|
@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
|
||||||
addr = *track_addr;
|
addr = *track_addr;
|
||||||
} else {
|
} else {
|
||||||
addr = ConstBufferAddr{
|
addr = ConstBufferAddr{
|
||||||
.index{env.TextureBoundBuffer()},
|
.index = env.TextureBoundBuffer(),
|
||||||
.offset{inst.Arg(0).U32()},
|
.offset = inst.Arg(0).U32(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return TextureInst{
|
return TextureInst{
|
||||||
.cbuf{addr},
|
.cbuf{addr},
|
||||||
.inst{&inst},
|
.inst = &inst,
|
||||||
.block{block},
|
.block = block,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) {
|
||||||
|
|
||||||
const auto& cbuf{texture_inst.cbuf};
|
const auto& cbuf{texture_inst.cbuf};
|
||||||
auto flags{inst->Flags<IR::TextureInstInfo>()};
|
auto flags{inst->Flags<IR::TextureInstInfo>()};
|
||||||
switch (inst->Opcode()) {
|
switch (inst->GetOpcode()) {
|
||||||
case IR::Opcode::ImageQueryDimensions:
|
case IR::Opcode::ImageQueryDimensions:
|
||||||
flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset));
|
flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset));
|
||||||
inst->SetFlags(flags);
|
inst->SetFlags(flags);
|
||||||
|
@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) {
|
||||||
u32 index;
|
u32 index;
|
||||||
if (flags.type == TextureType::Buffer) {
|
if (flags.type == TextureType::Buffer) {
|
||||||
index = descriptors.Add(TextureBufferDescriptor{
|
index = descriptors.Add(TextureBufferDescriptor{
|
||||||
.cbuf_index{cbuf.index},
|
.cbuf_index = cbuf.index,
|
||||||
.cbuf_offset{cbuf.offset},
|
.cbuf_offset = cbuf.offset,
|
||||||
.count{1},
|
.count = 1,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
index = descriptors.Add(TextureDescriptor{
|
index = descriptors.Add(TextureDescriptor{
|
||||||
.type{flags.type},
|
.type = flags.type,
|
||||||
.cbuf_index{cbuf.index},
|
.cbuf_index = cbuf.index,
|
||||||
.cbuf_offset{cbuf.offset},
|
.cbuf_offset = cbuf.offset,
|
||||||
.count{1},
|
.count = 1,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
inst->SetArg(0, IR::Value{index});
|
inst->SetArg(0, IR::Value{index});
|
||||||
|
|
|
@ -14,14 +14,14 @@ namespace Shader::Optimization {
|
||||||
static void ValidateTypes(const IR::Program& program) {
|
static void ValidateTypes(const IR::Program& program) {
|
||||||
for (const auto& block : program.blocks) {
|
for (const auto& block : program.blocks) {
|
||||||
for (const IR::Inst& inst : *block) {
|
for (const IR::Inst& inst : *block) {
|
||||||
if (inst.Opcode() == IR::Opcode::Phi) {
|
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||||
// Skip validation on phi nodes
|
// Skip validation on phi nodes
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const size_t num_args{inst.NumArgs()};
|
const size_t num_args{inst.NumArgs()};
|
||||||
for (size_t i = 0; i < num_args; ++i) {
|
for (size_t i = 0; i < num_args; ++i) {
|
||||||
const IR::Type t1{inst.Arg(i).Type()};
|
const IR::Type t1{inst.Arg(i).Type()};
|
||||||
const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
|
const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
|
||||||
if (!IR::AreTypesCompatible(t1, t2)) {
|
if (!IR::AreTypesCompatible(t1, t2)) {
|
||||||
throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
|
throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,10 +17,12 @@ struct Noisy {
|
||||||
Noisy& operator=(Noisy&& rhs) noexcept {
|
Noisy& operator=(Noisy&& rhs) noexcept {
|
||||||
state = "Move assigned";
|
state = "Move assigned";
|
||||||
rhs.state = "Moved away";
|
rhs.state = "Moved away";
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
Noisy(const Noisy&) : state{"Copied constructed"} {}
|
Noisy(const Noisy&) : state{"Copied constructed"} {}
|
||||||
Noisy& operator=(const Noisy&) {
|
Noisy& operator=(const Noisy&) {
|
||||||
state = "Copied assigned";
|
state = "Copied assigned";
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string state;
|
std::string state;
|
||||||
|
|
|
@ -203,7 +203,7 @@ add_library(video_core STATIC
|
||||||
create_target_directory_groups(video_core)
|
create_target_directory_groups(video_core)
|
||||||
|
|
||||||
target_link_libraries(video_core PUBLIC common core)
|
target_link_libraries(video_core PUBLIC common core)
|
||||||
target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak)
|
target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
|
||||||
|
|
||||||
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
|
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
|
||||||
add_dependencies(video_core ffmpeg-build)
|
add_dependencies(video_core ffmpeg-build)
|
||||||
|
|
|
@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
|
||||||
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||||
.pDynamicStates = dynamic_states.data(),
|
.pDynamicStates = dynamic_states.data(),
|
||||||
};
|
};
|
||||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
[[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.requiredSubgroupSize = GuestWarpSize,
|
.requiredSubgroupSize = GuestWarpSize,
|
||||||
|
@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
|
||||||
if (!spv_modules[stage]) {
|
if (!spv_modules[stage]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
[[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
|
[[maybe_unused]] auto& stage_ci =
|
||||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
|
||||||
.pNext = nullptr,
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
.flags = 0,
|
.pNext = nullptr,
|
||||||
.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
|
.flags = 0,
|
||||||
.module = *spv_modules[stage],
|
.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
|
||||||
.pName = "main",
|
.module = *spv_modules[stage],
|
||||||
.pSpecializationInfo = nullptr,
|
.pName = "main",
|
||||||
});
|
.pSpecializationInfo = nullptr,
|
||||||
|
});
|
||||||
/*
|
/*
|
||||||
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
|
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
|
||||||
stage_ci.pNext = &subgroup_size_ci;
|
stage_ci.pNext = &subgroup_size_ci;
|
||||||
|
|
|
@ -47,7 +47,7 @@ auto MakeSpan(Container& container) {
|
||||||
return std::span(container.data(), container.size());
|
return std::span(container.data(), container.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 MakeCbufKey(u32 index, u32 offset) {
|
static u64 MakeCbufKey(u32 index, u32 offset) {
|
||||||
return (static_cast<u64>(index) << 32) | offset;
|
return (static_cast<u64>(index) << 32) | offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
|
||||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
||||||
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
|
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
|
||||||
.generic_input_types{},
|
.generic_input_types{},
|
||||||
|
.fixed_state_point_size{},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -748,7 +749,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||||
Shader::Environment& env{*envs[env_index]};
|
Shader::Environment& env{*envs[env_index]};
|
||||||
++env_index;
|
++env_index;
|
||||||
|
|
||||||
const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)};
|
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
||||||
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
|
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
|
||||||
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
|
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
|
@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
|
[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
|
||||||
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
|
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
|
||||||
std::vector<VkBufferCopy> result(copies.size());
|
std::vector<VkBufferCopy> result(copies.size());
|
||||||
std::ranges::transform(
|
std::ranges::transform(
|
||||||
|
|
Loading…
Reference in a new issue