2021-01-09 07:30:07 +01:00
|
|
|
// Copyright 2021 yuzu Emulator Project
|
|
|
|
// Licensed under GPLv2 or any later version
|
|
|
|
// Refer to the license.txt file included.
|
|
|
|
|
|
|
|
#include "common/bit_field.h"
|
|
|
|
#include "common/common_types.h"
|
|
|
|
#include "shader_recompiler/exception.h"
|
2021-02-06 03:11:23 +01:00
|
|
|
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
2021-01-09 07:30:07 +01:00
|
|
|
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
|
|
|
|
|
|
|
namespace Shader::Maxwell {
|
|
|
|
namespace {
|
2021-02-03 20:43:04 +01:00
|
|
|
enum class LoadSize : u64 {
|
|
|
|
U8, // Zero-extend
|
|
|
|
S8, // Sign-extend
|
|
|
|
U16, // Zero-extend
|
|
|
|
S16, // Sign-extend
|
|
|
|
B32,
|
|
|
|
B64,
|
|
|
|
B128,
|
|
|
|
U128, // ???
|
|
|
|
};
|
|
|
|
|
2021-01-09 07:30:07 +01:00
|
|
|
enum class StoreSize : u64 {
|
2021-02-03 20:43:04 +01:00
|
|
|
U8, // Zero-extend
|
|
|
|
S8, // Sign-extend
|
|
|
|
U16, // Zero-extend
|
|
|
|
S16, // Sign-extend
|
2021-01-09 07:30:07 +01:00
|
|
|
B32,
|
|
|
|
B64,
|
|
|
|
B128,
|
|
|
|
};
|
|
|
|
|
2021-02-03 20:43:04 +01:00
|
|
|
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
|
|
|
enum class LoadCache : u64 {
|
|
|
|
CA, // Cache at all levels, likely to be accessed again
|
|
|
|
CG, // Cache at global level (cache in L2 and below, not L1)
|
|
|
|
CI, // ???
|
|
|
|
CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
|
|
|
|
};
|
|
|
|
|
2021-01-09 07:30:07 +01:00
|
|
|
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
|
|
|
enum class StoreCache : u64 {
|
|
|
|
WB, // Cache write-back all coherent levels
|
|
|
|
CG, // Cache at global level
|
|
|
|
CS, // Cache streaming, likely to be accessed once
|
|
|
|
WT, // Cache write-through (to system memory)
|
|
|
|
};
|
|
|
|
|
2021-02-03 20:43:04 +01:00
|
|
|
IR::U64 Address(TranslatorVisitor& v, u64 insn) {
|
2021-01-09 07:30:07 +01:00
|
|
|
union {
|
|
|
|
u64 raw;
|
|
|
|
BitField<8, 8, IR::Reg> addr_reg;
|
2021-02-03 20:43:04 +01:00
|
|
|
BitField<20, 24, s64> addr_offset;
|
|
|
|
BitField<20, 24, u64> rz_addr_offset;
|
2021-01-09 07:30:07 +01:00
|
|
|
BitField<45, 1, u64> e;
|
2021-02-03 20:43:04 +01:00
|
|
|
} const mem{insn};
|
2021-01-09 07:30:07 +01:00
|
|
|
|
|
|
|
const IR::U64 address{[&]() -> IR::U64 {
|
2021-02-03 20:43:04 +01:00
|
|
|
if (mem.e == 0) {
|
|
|
|
// LDG/STG without .E uses a 32-bit pointer, zero-extend it
|
2021-03-03 07:07:19 +01:00
|
|
|
return v.ir.UConvert(64, v.X(mem.addr_reg));
|
2021-01-09 07:30:07 +01:00
|
|
|
}
|
2021-02-03 20:43:04 +01:00
|
|
|
if (!IR::IsAligned(mem.addr_reg, 2)) {
|
2021-01-09 07:30:07 +01:00
|
|
|
throw NotImplementedException("Unaligned address register");
|
|
|
|
}
|
2021-02-03 20:43:04 +01:00
|
|
|
// Pack two registers to build the 64-bit address
|
|
|
|
return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
|
|
|
|
}()};
|
|
|
|
const u64 addr_offset{[&]() -> u64 {
|
|
|
|
if (mem.addr_reg == IR::Reg::RZ) {
|
|
|
|
// When RZ is used, the address is an absolute address
|
|
|
|
return static_cast<u64>(mem.rz_addr_offset.Value());
|
|
|
|
} else {
|
|
|
|
return static_cast<u64>(mem.addr_offset.Value());
|
|
|
|
}
|
2021-01-09 07:30:07 +01:00
|
|
|
}()};
|
2021-02-03 20:43:04 +01:00
|
|
|
// Apply the offset
|
|
|
|
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
|
|
|
|
}
|
|
|
|
} // Anonymous namespace
|
|
|
|
|
|
|
|
void TranslatorVisitor::LDG(u64 insn) {
|
|
|
|
// LDG loads global memory into registers
|
|
|
|
union {
|
|
|
|
u64 raw;
|
|
|
|
BitField<0, 8, IR::Reg> dest_reg;
|
|
|
|
BitField<46, 2, LoadCache> cache;
|
|
|
|
BitField<48, 3, LoadSize> size;
|
|
|
|
} const ldg{insn};
|
|
|
|
|
|
|
|
// Pointer to load data from
|
|
|
|
const IR::U64 address{Address(*this, insn)};
|
|
|
|
const IR::Reg dest_reg{ldg.dest_reg};
|
|
|
|
switch (ldg.size) {
|
|
|
|
case LoadSize::U8:
|
|
|
|
X(dest_reg, ir.LoadGlobalU8(address));
|
|
|
|
break;
|
|
|
|
case LoadSize::S8:
|
|
|
|
X(dest_reg, ir.LoadGlobalS8(address));
|
|
|
|
break;
|
|
|
|
case LoadSize::U16:
|
|
|
|
X(dest_reg, ir.LoadGlobalU16(address));
|
|
|
|
break;
|
|
|
|
case LoadSize::S16:
|
|
|
|
X(dest_reg, ir.LoadGlobalS16(address));
|
|
|
|
break;
|
|
|
|
case LoadSize::B32:
|
|
|
|
X(dest_reg, ir.LoadGlobal32(address));
|
|
|
|
break;
|
|
|
|
case LoadSize::B64: {
|
|
|
|
if (!IR::IsAligned(dest_reg, 2)) {
|
|
|
|
throw NotImplementedException("Unaligned data registers");
|
|
|
|
}
|
|
|
|
const IR::Value vector{ir.LoadGlobal64(address)};
|
|
|
|
for (int i = 0; i < 2; ++i) {
|
2021-02-05 23:19:36 +01:00
|
|
|
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
|
2021-02-03 20:43:04 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case LoadSize::B128: {
|
|
|
|
if (!IR::IsAligned(dest_reg, 4)) {
|
|
|
|
throw NotImplementedException("Unaligned data registers");
|
|
|
|
}
|
|
|
|
const IR::Value vector{ir.LoadGlobal128(address)};
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
2021-02-05 23:19:36 +01:00
|
|
|
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
|
2021-02-03 20:43:04 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case LoadSize::U128:
|
|
|
|
throw NotImplementedException("LDG U.128");
|
|
|
|
default:
|
|
|
|
throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TranslatorVisitor::STG(u64 insn) {
|
|
|
|
// STG stores registers into global memory.
|
|
|
|
union {
|
|
|
|
u64 raw;
|
|
|
|
BitField<0, 8, IR::Reg> data_reg;
|
|
|
|
BitField<46, 2, StoreCache> cache;
|
|
|
|
BitField<48, 3, StoreSize> size;
|
|
|
|
} const stg{insn};
|
2021-01-09 07:30:07 +01:00
|
|
|
|
2021-02-03 20:43:04 +01:00
|
|
|
// Pointer to store data into
|
|
|
|
const IR::U64 address{Address(*this, insn)};
|
|
|
|
const IR::Reg data_reg{stg.data_reg};
|
2021-01-09 07:30:07 +01:00
|
|
|
switch (stg.size) {
|
|
|
|
case StoreSize::U8:
|
2021-02-03 20:43:04 +01:00
|
|
|
ir.WriteGlobalU8(address, X(data_reg));
|
2021-01-09 07:30:07 +01:00
|
|
|
break;
|
|
|
|
case StoreSize::S8:
|
2021-02-03 20:43:04 +01:00
|
|
|
ir.WriteGlobalS8(address, X(data_reg));
|
2021-01-09 07:30:07 +01:00
|
|
|
break;
|
|
|
|
case StoreSize::U16:
|
2021-02-03 20:43:04 +01:00
|
|
|
ir.WriteGlobalU16(address, X(data_reg));
|
2021-01-09 07:30:07 +01:00
|
|
|
break;
|
|
|
|
case StoreSize::S16:
|
2021-02-03 20:43:04 +01:00
|
|
|
ir.WriteGlobalS16(address, X(data_reg));
|
2021-01-09 07:30:07 +01:00
|
|
|
break;
|
|
|
|
case StoreSize::B32:
|
2021-02-03 20:43:04 +01:00
|
|
|
ir.WriteGlobal32(address, X(data_reg));
|
2021-01-09 07:30:07 +01:00
|
|
|
break;
|
|
|
|
case StoreSize::B64: {
|
2021-02-03 20:43:04 +01:00
|
|
|
if (!IR::IsAligned(data_reg, 2)) {
|
2021-01-09 07:30:07 +01:00
|
|
|
throw NotImplementedException("Unaligned data registers");
|
|
|
|
}
|
2021-02-03 20:43:04 +01:00
|
|
|
const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
|
2021-01-09 07:30:07 +01:00
|
|
|
ir.WriteGlobal64(address, vector);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case StoreSize::B128:
|
2021-02-03 20:43:04 +01:00
|
|
|
if (!IR::IsAligned(data_reg, 4)) {
|
2021-01-09 07:30:07 +01:00
|
|
|
throw NotImplementedException("Unaligned data registers");
|
|
|
|
}
|
2021-02-03 20:43:04 +01:00
|
|
|
const IR::Value vector{
|
|
|
|
ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
|
2021-01-09 07:30:07 +01:00
|
|
|
ir.WriteGlobal128(address, vector);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace Shader::Maxwell
|