1153 lines
36 KiB
C++
1153 lines
36 KiB
C++
// Copyright 2014 Citra Emulator Project
|
|
// Licensed under GPLv2 or any later version
|
|
// Refer to the license.txt file included.
|
|
|
|
#pragma once
|
|
|
|
#include <array>
|
|
#include <cmath>
|
|
#include <cstddef>
|
|
#include <string>
|
|
|
|
#include "common/assert.h"
|
|
#include "common/bit_field.h"
|
|
#include "common/common_funcs.h"
|
|
#include "common/common_types.h"
|
|
#include "common/logging/log.h"
|
|
#include "common/vector_math.h"
|
|
|
|
namespace Pica {
|
|
|
|
// Returns index corresponding to the Regs member labeled by field_name
|
|
// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
|
|
// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])).
|
|
// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
|
|
// Hopefully, this will be fixed sometime in the future.
|
|
// For lack of better alternatives, we currently hardcode the offsets when constant
|
|
// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
|
|
// will then make sure the offsets indeed match the automatically calculated ones).
|
|
#define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32))
|
|
#if defined(_MSC_VER)
|
|
#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
|
|
#else
|
|
// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
|
|
// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX
|
|
// and then performs a (no-op) cast to size_t iff the second argument matches the expected
|
|
// field offset. Otherwise, the compiler will fail to compile this code.
|
|
#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
|
|
((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name))
|
|
#endif // _MSC_VER
|
|
|
|
struct Regs {
|
|
|
|
INSERT_PADDING_WORDS(0x10);
|
|
|
|
u32 trigger_irq;
|
|
|
|
INSERT_PADDING_WORDS(0x2f);
|
|
|
|
enum class CullMode : u32 {
|
|
// Select which polygons are considered to be "frontfacing".
|
|
KeepAll = 0,
|
|
KeepClockWise = 1,
|
|
KeepCounterClockWise = 2,
|
|
// TODO: What does the third value imply?
|
|
};
|
|
|
|
union {
|
|
BitField<0, 2, CullMode> cull_mode;
|
|
};
|
|
|
|
BitField<0, 24, u32> viewport_size_x;
|
|
|
|
INSERT_PADDING_WORDS(0x1);
|
|
|
|
BitField<0, 24, u32> viewport_size_y;
|
|
|
|
INSERT_PADDING_WORDS(0x9);
|
|
|
|
BitField<0, 24, u32> viewport_depth_range; // float24
|
|
BitField<0, 24, u32> viewport_depth_far_plane; // float24
|
|
|
|
INSERT_PADDING_WORDS(0x1);
|
|
|
|
union VSOutputAttributes {
|
|
// Maps components of output vertex attributes to semantics
|
|
enum Semantic : u32
|
|
{
|
|
POSITION_X = 0,
|
|
POSITION_Y = 1,
|
|
POSITION_Z = 2,
|
|
POSITION_W = 3,
|
|
|
|
QUATERNION_X = 4,
|
|
QUATERNION_Y = 5,
|
|
QUATERNION_Z = 6,
|
|
QUATERNION_W = 7,
|
|
|
|
COLOR_R = 8,
|
|
COLOR_G = 9,
|
|
COLOR_B = 10,
|
|
COLOR_A = 11,
|
|
|
|
TEXCOORD0_U = 12,
|
|
TEXCOORD0_V = 13,
|
|
TEXCOORD1_U = 14,
|
|
TEXCOORD1_V = 15,
|
|
|
|
// TODO: Not verified
|
|
VIEW_X = 18,
|
|
VIEW_Y = 19,
|
|
VIEW_Z = 20,
|
|
|
|
TEXCOORD2_U = 22,
|
|
TEXCOORD2_V = 23,
|
|
|
|
INVALID = 31,
|
|
};
|
|
|
|
BitField< 0, 5, Semantic> map_x;
|
|
BitField< 8, 5, Semantic> map_y;
|
|
BitField<16, 5, Semantic> map_z;
|
|
BitField<24, 5, Semantic> map_w;
|
|
} vs_output_attributes[7];
|
|
|
|
INSERT_PADDING_WORDS(0x11);
|
|
|
|
union {
|
|
BitField< 0, 16, u32> x;
|
|
BitField<16, 16, u32> y;
|
|
} viewport_corner;
|
|
|
|
INSERT_PADDING_WORDS(0x17);
|
|
|
|
struct TextureConfig {
|
|
enum WrapMode : u32 {
|
|
ClampToEdge = 0,
|
|
ClampToBorder = 1,
|
|
Repeat = 2,
|
|
MirroredRepeat = 3,
|
|
};
|
|
|
|
enum TextureFilter : u32 {
|
|
Nearest = 0,
|
|
Linear = 1
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 8, u32> r;
|
|
BitField< 8, 8, u32> g;
|
|
BitField<16, 8, u32> b;
|
|
BitField<24, 8, u32> a;
|
|
} border_color;
|
|
|
|
union {
|
|
BitField< 0, 16, u32> height;
|
|
BitField<16, 16, u32> width;
|
|
};
|
|
|
|
union {
|
|
BitField< 1, 1, TextureFilter> mag_filter;
|
|
BitField< 2, 1, TextureFilter> min_filter;
|
|
BitField< 8, 2, WrapMode> wrap_t;
|
|
BitField<12, 2, WrapMode> wrap_s;
|
|
};
|
|
|
|
INSERT_PADDING_WORDS(0x1);
|
|
|
|
u32 address;
|
|
|
|
u32 GetPhysicalAddress() const {
|
|
return DecodeAddressRegister(address);
|
|
}
|
|
|
|
// texture1 and texture2 store the texture format directly after the address
|
|
// whereas texture0 inserts some additional flags inbetween.
|
|
// Hence, we store the format separately so that all other parameters can be described
|
|
// in a single structure.
|
|
};
|
|
|
|
enum class TextureFormat : u32 {
|
|
RGBA8 = 0,
|
|
RGB8 = 1,
|
|
RGB5A1 = 2,
|
|
RGB565 = 3,
|
|
RGBA4 = 4,
|
|
IA8 = 5,
|
|
RG8 = 6, ///< @note Also called HILO8 in 3DBrew.
|
|
I8 = 7,
|
|
A8 = 8,
|
|
IA4 = 9,
|
|
I4 = 10,
|
|
A4 = 11,
|
|
ETC1 = 12, // compressed
|
|
ETC1A4 = 13, // compressed
|
|
};
|
|
|
|
enum class LogicOp : u32 {
|
|
Clear = 0,
|
|
And = 1,
|
|
AndReverse = 2,
|
|
Copy = 3,
|
|
Set = 4,
|
|
CopyInverted = 5,
|
|
NoOp = 6,
|
|
Invert = 7,
|
|
Nand = 8,
|
|
Or = 9,
|
|
Nor = 10,
|
|
Xor = 11,
|
|
Equiv = 12,
|
|
AndInverted = 13,
|
|
OrReverse = 14,
|
|
OrInverted = 15,
|
|
};
|
|
|
|
static unsigned NibblesPerPixel(TextureFormat format) {
|
|
switch (format) {
|
|
case TextureFormat::RGBA8:
|
|
return 8;
|
|
|
|
case TextureFormat::RGB8:
|
|
return 6;
|
|
|
|
case TextureFormat::RGB5A1:
|
|
case TextureFormat::RGB565:
|
|
case TextureFormat::RGBA4:
|
|
case TextureFormat::IA8:
|
|
case TextureFormat::RG8:
|
|
return 4;
|
|
|
|
case TextureFormat::I4:
|
|
case TextureFormat::A4:
|
|
return 1;
|
|
|
|
case TextureFormat::I8:
|
|
case TextureFormat::A8:
|
|
case TextureFormat::IA4:
|
|
default: // placeholder for yet unknown formats
|
|
return 2;
|
|
}
|
|
}
|
|
|
|
union {
|
|
BitField< 0, 1, u32> texture0_enable;
|
|
BitField< 1, 1, u32> texture1_enable;
|
|
BitField< 2, 1, u32> texture2_enable;
|
|
};
|
|
TextureConfig texture0;
|
|
INSERT_PADDING_WORDS(0x8);
|
|
BitField<0, 4, TextureFormat> texture0_format;
|
|
INSERT_PADDING_WORDS(0x2);
|
|
TextureConfig texture1;
|
|
BitField<0, 4, TextureFormat> texture1_format;
|
|
INSERT_PADDING_WORDS(0x2);
|
|
TextureConfig texture2;
|
|
BitField<0, 4, TextureFormat> texture2_format;
|
|
INSERT_PADDING_WORDS(0x21);
|
|
|
|
struct FullTextureConfig {
|
|
const bool enabled;
|
|
const TextureConfig config;
|
|
const TextureFormat format;
|
|
};
|
|
const std::array<FullTextureConfig, 3> GetTextures() const {
|
|
return {{
|
|
{ texture0_enable.ToBool(), texture0, texture0_format },
|
|
{ texture1_enable.ToBool(), texture1, texture1_format },
|
|
{ texture2_enable.ToBool(), texture2, texture2_format }
|
|
}};
|
|
}
|
|
|
|
// 0xc0-0xff: Texture Combiner (akin to glTexEnv)
|
|
struct TevStageConfig {
|
|
enum class Source : u32 {
|
|
PrimaryColor = 0x0,
|
|
PrimaryFragmentColor = 0x1,
|
|
SecondaryFragmentColor = 0x2,
|
|
|
|
Texture0 = 0x3,
|
|
Texture1 = 0x4,
|
|
Texture2 = 0x5,
|
|
Texture3 = 0x6,
|
|
|
|
PreviousBuffer = 0xd,
|
|
Constant = 0xe,
|
|
Previous = 0xf,
|
|
};
|
|
|
|
enum class ColorModifier : u32 {
|
|
SourceColor = 0x0,
|
|
OneMinusSourceColor = 0x1,
|
|
SourceAlpha = 0x2,
|
|
OneMinusSourceAlpha = 0x3,
|
|
SourceRed = 0x4,
|
|
OneMinusSourceRed = 0x5,
|
|
|
|
SourceGreen = 0x8,
|
|
OneMinusSourceGreen = 0x9,
|
|
|
|
SourceBlue = 0xc,
|
|
OneMinusSourceBlue = 0xd,
|
|
};
|
|
|
|
enum class AlphaModifier : u32 {
|
|
SourceAlpha = 0x0,
|
|
OneMinusSourceAlpha = 0x1,
|
|
SourceRed = 0x2,
|
|
OneMinusSourceRed = 0x3,
|
|
SourceGreen = 0x4,
|
|
OneMinusSourceGreen = 0x5,
|
|
SourceBlue = 0x6,
|
|
OneMinusSourceBlue = 0x7,
|
|
};
|
|
|
|
enum class Operation : u32 {
|
|
Replace = 0,
|
|
Modulate = 1,
|
|
Add = 2,
|
|
AddSigned = 3,
|
|
Lerp = 4,
|
|
Subtract = 5,
|
|
Dot3_RGB = 6,
|
|
|
|
MultiplyThenAdd = 8,
|
|
AddThenMultiply = 9,
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 4, Source> color_source1;
|
|
BitField< 4, 4, Source> color_source2;
|
|
BitField< 8, 4, Source> color_source3;
|
|
BitField<16, 4, Source> alpha_source1;
|
|
BitField<20, 4, Source> alpha_source2;
|
|
BitField<24, 4, Source> alpha_source3;
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 4, ColorModifier> color_modifier1;
|
|
BitField< 4, 4, ColorModifier> color_modifier2;
|
|
BitField< 8, 4, ColorModifier> color_modifier3;
|
|
BitField<12, 3, AlphaModifier> alpha_modifier1;
|
|
BitField<16, 3, AlphaModifier> alpha_modifier2;
|
|
BitField<20, 3, AlphaModifier> alpha_modifier3;
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 4, Operation> color_op;
|
|
BitField<16, 4, Operation> alpha_op;
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 8, u32> const_r;
|
|
BitField< 8, 8, u32> const_g;
|
|
BitField<16, 8, u32> const_b;
|
|
BitField<24, 8, u32> const_a;
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 2, u32> color_scale;
|
|
BitField<16, 2, u32> alpha_scale;
|
|
};
|
|
|
|
inline unsigned GetColorMultiplier() const {
|
|
return (color_scale < 3) ? (1 << color_scale) : 1;
|
|
}
|
|
|
|
inline unsigned GetAlphaMultiplier() const {
|
|
return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
|
|
}
|
|
};
|
|
|
|
TevStageConfig tev_stage0;
|
|
INSERT_PADDING_WORDS(0x3);
|
|
TevStageConfig tev_stage1;
|
|
INSERT_PADDING_WORDS(0x3);
|
|
TevStageConfig tev_stage2;
|
|
INSERT_PADDING_WORDS(0x3);
|
|
TevStageConfig tev_stage3;
|
|
INSERT_PADDING_WORDS(0x3);
|
|
|
|
union {
|
|
// Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
|
|
// these masks are set
|
|
BitField< 8, 4, u32> update_mask_rgb;
|
|
BitField<12, 4, u32> update_mask_a;
|
|
|
|
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
|
return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
|
|
}
|
|
|
|
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
|
|
return (stage_index < 4) && (update_mask_a & (1 << stage_index));
|
|
}
|
|
} tev_combiner_buffer_input;
|
|
|
|
INSERT_PADDING_WORDS(0xf);
|
|
TevStageConfig tev_stage4;
|
|
INSERT_PADDING_WORDS(0x3);
|
|
TevStageConfig tev_stage5;
|
|
|
|
union {
|
|
BitField< 0, 8, u32> r;
|
|
BitField< 8, 8, u32> g;
|
|
BitField<16, 8, u32> b;
|
|
BitField<24, 8, u32> a;
|
|
} tev_combiner_buffer_color;
|
|
|
|
INSERT_PADDING_WORDS(0x2);
|
|
|
|
const std::array<Regs::TevStageConfig,6> GetTevStages() const {
|
|
return {{ tev_stage0, tev_stage1,
|
|
tev_stage2, tev_stage3,
|
|
tev_stage4, tev_stage5 }};
|
|
};
|
|
|
|
enum class BlendEquation : u32 {
|
|
Add = 0,
|
|
Subtract = 1,
|
|
ReverseSubtract = 2,
|
|
Min = 3,
|
|
Max = 4,
|
|
};
|
|
|
|
enum class BlendFactor : u32 {
|
|
Zero = 0,
|
|
One = 1,
|
|
SourceColor = 2,
|
|
OneMinusSourceColor = 3,
|
|
DestColor = 4,
|
|
OneMinusDestColor = 5,
|
|
SourceAlpha = 6,
|
|
OneMinusSourceAlpha = 7,
|
|
DestAlpha = 8,
|
|
OneMinusDestAlpha = 9,
|
|
ConstantColor = 10,
|
|
OneMinusConstantColor = 11,
|
|
ConstantAlpha = 12,
|
|
OneMinusConstantAlpha = 13,
|
|
SourceAlphaSaturate = 14,
|
|
};
|
|
|
|
enum class CompareFunc : u32 {
|
|
Never = 0,
|
|
Always = 1,
|
|
Equal = 2,
|
|
NotEqual = 3,
|
|
LessThan = 4,
|
|
LessThanOrEqual = 5,
|
|
GreaterThan = 6,
|
|
GreaterThanOrEqual = 7,
|
|
};
|
|
|
|
enum class StencilAction : u32 {
|
|
Keep = 0,
|
|
Xor = 5,
|
|
};
|
|
|
|
struct {
|
|
union {
|
|
// If false, logic blending is used
|
|
BitField<8, 1, u32> alphablend_enable;
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 8, BlendEquation> blend_equation_rgb;
|
|
BitField< 8, 8, BlendEquation> blend_equation_a;
|
|
|
|
BitField<16, 4, BlendFactor> factor_source_rgb;
|
|
BitField<20, 4, BlendFactor> factor_dest_rgb;
|
|
|
|
BitField<24, 4, BlendFactor> factor_source_a;
|
|
BitField<28, 4, BlendFactor> factor_dest_a;
|
|
} alpha_blending;
|
|
|
|
union {
|
|
BitField<0, 4, LogicOp> logic_op;
|
|
};
|
|
|
|
union {
|
|
BitField< 0, 8, u32> r;
|
|
BitField< 8, 8, u32> g;
|
|
BitField<16, 8, u32> b;
|
|
BitField<24, 8, u32> a;
|
|
} blend_const;
|
|
|
|
union {
|
|
BitField< 0, 1, u32> enable;
|
|
BitField< 4, 3, CompareFunc> func;
|
|
BitField< 8, 8, u32> ref;
|
|
} alpha_test;
|
|
|
|
struct {
|
|
union {
|
|
// If true, enable stencil testing
|
|
BitField< 0, 1, u32> enable;
|
|
|
|
// Comparison operation for stencil testing
|
|
BitField< 4, 3, CompareFunc> func;
|
|
|
|
// Value to calculate the new stencil value from
|
|
BitField< 8, 8, u32> replacement_value;
|
|
|
|
// Value to compare against for stencil testing
|
|
BitField<16, 8, u32> reference_value;
|
|
|
|
// Mask to apply on stencil test inputs
|
|
BitField<24, 8, u32> mask;
|
|
};
|
|
|
|
union {
|
|
// Action to perform when the stencil test fails
|
|
BitField< 0, 3, StencilAction> action_stencil_fail;
|
|
|
|
// Action to perform when stencil testing passed but depth testing fails
|
|
BitField< 4, 3, StencilAction> action_depth_fail;
|
|
|
|
// Action to perform when both stencil and depth testing pass
|
|
BitField< 8, 3, StencilAction> action_depth_pass;
|
|
};
|
|
} stencil_test;
|
|
|
|
union {
|
|
BitField< 0, 1, u32> depth_test_enable;
|
|
BitField< 4, 3, CompareFunc> depth_test_func;
|
|
BitField< 8, 1, u32> red_enable;
|
|
BitField< 9, 1, u32> green_enable;
|
|
BitField<10, 1, u32> blue_enable;
|
|
BitField<11, 1, u32> alpha_enable;
|
|
BitField<12, 1, u32> depth_write_enable;
|
|
};
|
|
|
|
INSERT_PADDING_WORDS(0x8);
|
|
} output_merger;
|
|
|
|
// Components are laid out in reverse byte order, most significant bits first.
|
|
enum class ColorFormat : u32 {
|
|
RGBA8 = 0,
|
|
RGB8 = 1,
|
|
RGB5A1 = 2,
|
|
RGB565 = 3,
|
|
RGBA4 = 4,
|
|
};
|
|
|
|
enum class DepthFormat : u32 {
|
|
D16 = 0,
|
|
D24 = 2,
|
|
D24S8 = 3,
|
|
};
|
|
|
|
// Returns the number of bytes in the specified color format
|
|
static unsigned BytesPerColorPixel(ColorFormat format) {
|
|
switch (format) {
|
|
case ColorFormat::RGBA8:
|
|
return 4;
|
|
case ColorFormat::RGB8:
|
|
return 3;
|
|
case ColorFormat::RGB5A1:
|
|
case ColorFormat::RGB565:
|
|
case ColorFormat::RGBA4:
|
|
return 2;
|
|
default:
|
|
LOG_CRITICAL(HW_GPU, "Unknown color format %u", format);
|
|
UNIMPLEMENTED();
|
|
}
|
|
}
|
|
|
|
struct {
|
|
INSERT_PADDING_WORDS(0x6);
|
|
|
|
DepthFormat depth_format; // TODO: Should be a BitField!
|
|
BitField<16, 3, ColorFormat> color_format;
|
|
|
|
INSERT_PADDING_WORDS(0x4);
|
|
|
|
u32 depth_buffer_address;
|
|
u32 color_buffer_address;
|
|
|
|
union {
|
|
// Apparently, the framebuffer width is stored as expected,
|
|
// while the height is stored as the actual height minus one.
|
|
// Hence, don't access these fields directly but use the accessors
|
|
// GetWidth() and GetHeight() instead.
|
|
BitField< 0, 11, u32> width;
|
|
BitField<12, 10, u32> height;
|
|
};
|
|
|
|
INSERT_PADDING_WORDS(0x1);
|
|
|
|
inline u32 GetColorBufferPhysicalAddress() const {
|
|
return DecodeAddressRegister(color_buffer_address);
|
|
}
|
|
inline u32 GetDepthBufferPhysicalAddress() const {
|
|
return DecodeAddressRegister(depth_buffer_address);
|
|
}
|
|
|
|
inline u32 GetWidth() const {
|
|
return width;
|
|
}
|
|
|
|
inline u32 GetHeight() const {
|
|
return height + 1;
|
|
}
|
|
} framebuffer;
|
|
|
|
// Returns the number of bytes in the specified depth format
|
|
static u32 BytesPerDepthPixel(DepthFormat format) {
|
|
switch (format) {
|
|
case DepthFormat::D16:
|
|
return 2;
|
|
case DepthFormat::D24:
|
|
return 3;
|
|
case DepthFormat::D24S8:
|
|
return 4;
|
|
default:
|
|
LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
|
|
UNIMPLEMENTED();
|
|
}
|
|
}
|
|
|
|
// Returns the number of bits per depth component of the specified depth format
|
|
static u32 DepthBitsPerPixel(DepthFormat format) {
|
|
switch (format) {
|
|
case DepthFormat::D16:
|
|
return 16;
|
|
case DepthFormat::D24:
|
|
case DepthFormat::D24S8:
|
|
return 24;
|
|
default:
|
|
LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
|
|
UNIMPLEMENTED();
|
|
}
|
|
}
|
|
|
|
INSERT_PADDING_WORDS(0xe0);
|
|
|
|
enum class VertexAttributeFormat : u64 {
|
|
BYTE = 0,
|
|
UBYTE = 1,
|
|
SHORT = 2,
|
|
FLOAT = 3,
|
|
};
|
|
|
|
struct {
|
|
BitField<0, 29, u32> base_address;
|
|
|
|
u32 GetPhysicalBaseAddress() const {
|
|
return DecodeAddressRegister(base_address);
|
|
}
|
|
|
|
// Descriptor for internal vertex attributes
|
|
union {
|
|
BitField< 0, 2, VertexAttributeFormat> format0; // size of one element
|
|
BitField< 2, 2, u64> size0; // number of elements minus 1
|
|
BitField< 4, 2, VertexAttributeFormat> format1;
|
|
BitField< 6, 2, u64> size1;
|
|
BitField< 8, 2, VertexAttributeFormat> format2;
|
|
BitField<10, 2, u64> size2;
|
|
BitField<12, 2, VertexAttributeFormat> format3;
|
|
BitField<14, 2, u64> size3;
|
|
BitField<16, 2, VertexAttributeFormat> format4;
|
|
BitField<18, 2, u64> size4;
|
|
BitField<20, 2, VertexAttributeFormat> format5;
|
|
BitField<22, 2, u64> size5;
|
|
BitField<24, 2, VertexAttributeFormat> format6;
|
|
BitField<26, 2, u64> size6;
|
|
BitField<28, 2, VertexAttributeFormat> format7;
|
|
BitField<30, 2, u64> size7;
|
|
BitField<32, 2, VertexAttributeFormat> format8;
|
|
BitField<34, 2, u64> size8;
|
|
BitField<36, 2, VertexAttributeFormat> format9;
|
|
BitField<38, 2, u64> size9;
|
|
BitField<40, 2, VertexAttributeFormat> format10;
|
|
BitField<42, 2, u64> size10;
|
|
BitField<44, 2, VertexAttributeFormat> format11;
|
|
BitField<46, 2, u64> size11;
|
|
|
|
BitField<48, 12, u64> attribute_mask;
|
|
|
|
// number of total attributes minus 1
|
|
BitField<60, 4, u64> num_extra_attributes;
|
|
};
|
|
|
|
inline VertexAttributeFormat GetFormat(int n) const {
|
|
VertexAttributeFormat formats[] = {
|
|
format0, format1, format2, format3,
|
|
format4, format5, format6, format7,
|
|
format8, format9, format10, format11
|
|
};
|
|
return formats[n];
|
|
}
|
|
|
|
inline int GetNumElements(int n) const {
|
|
u64 sizes[] = {
|
|
size0, size1, size2, size3,
|
|
size4, size5, size6, size7,
|
|
size8, size9, size10, size11
|
|
};
|
|
return (int)sizes[n]+1;
|
|
}
|
|
|
|
inline int GetElementSizeInBytes(int n) const {
|
|
return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 :
|
|
(GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1;
|
|
}
|
|
|
|
inline int GetStride(int n) const {
|
|
return GetNumElements(n) * GetElementSizeInBytes(n);
|
|
}
|
|
|
|
inline bool IsDefaultAttribute(int id) const {
|
|
return (id >= 12) || (attribute_mask & (1ULL << id)) != 0;
|
|
}
|
|
|
|
inline int GetNumTotalAttributes() const {
|
|
return (int)num_extra_attributes+1;
|
|
}
|
|
|
|
// Attribute loaders map the source vertex data to input attributes
|
|
// This e.g. allows to load different attributes from different memory locations
|
|
struct {
|
|
// Source attribute data offset from the base address
|
|
u32 data_offset;
|
|
|
|
union {
|
|
BitField< 0, 4, u64> comp0;
|
|
BitField< 4, 4, u64> comp1;
|
|
BitField< 8, 4, u64> comp2;
|
|
BitField<12, 4, u64> comp3;
|
|
BitField<16, 4, u64> comp4;
|
|
BitField<20, 4, u64> comp5;
|
|
BitField<24, 4, u64> comp6;
|
|
BitField<28, 4, u64> comp7;
|
|
BitField<32, 4, u64> comp8;
|
|
BitField<36, 4, u64> comp9;
|
|
BitField<40, 4, u64> comp10;
|
|
BitField<44, 4, u64> comp11;
|
|
|
|
// bytes for a single vertex in this loader
|
|
BitField<48, 8, u64> byte_count;
|
|
|
|
BitField<60, 4, u64> component_count;
|
|
};
|
|
|
|
inline int GetComponent(int n) const {
|
|
u64 components[] = {
|
|
comp0, comp1, comp2, comp3,
|
|
comp4, comp5, comp6, comp7,
|
|
comp8, comp9, comp10, comp11
|
|
};
|
|
return (int)components[n];
|
|
}
|
|
} attribute_loaders[12];
|
|
} vertex_attributes;
|
|
|
|
struct {
|
|
enum IndexFormat : u32 {
|
|
BYTE = 0,
|
|
SHORT = 1,
|
|
};
|
|
|
|
union {
|
|
BitField<0, 31, u32> offset; // relative to base attribute address
|
|
BitField<31, 1, IndexFormat> format;
|
|
};
|
|
} index_array;
|
|
|
|
// Number of vertices to render
|
|
u32 num_vertices;
|
|
|
|
INSERT_PADDING_WORDS(0x5);
|
|
|
|
// These two trigger rendering of triangles
|
|
u32 trigger_draw;
|
|
u32 trigger_draw_indexed;
|
|
|
|
INSERT_PADDING_WORDS(0x2);
|
|
|
|
// These registers are used to setup the default "fall-back" vertex shader attributes
|
|
struct {
|
|
// Index of the current default attribute
|
|
u32 index;
|
|
|
|
// Writing to these registers sets the "current" default attribute.
|
|
u32 set_value[3];
|
|
} vs_default_attributes_setup;
|
|
|
|
INSERT_PADDING_WORDS(0x2);
|
|
|
|
struct {
|
|
// There are two channels that can be used to configure the next command buffer, which
|
|
// can be then executed by writing to the "trigger" registers. There are two reasons why a
|
|
// game might use this feature:
|
|
// 1) With this, an arbitrary number of additional command buffers may be executed in
|
|
// sequence without requiring any intervention of the CPU after the initial one is
|
|
// kicked off.
|
|
// 2) Games can configure these registers to provide a command list subroutine mechanism.
|
|
|
|
BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
|
|
BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
|
|
u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
|
|
|
|
unsigned GetSize(unsigned index) const {
|
|
ASSERT(index < 2);
|
|
return 8 * size[index];
|
|
}
|
|
|
|
PAddr GetPhysicalAddress(unsigned index) const {
|
|
ASSERT(index < 2);
|
|
return (PAddr)(8 * addr[index]);
|
|
}
|
|
} command_buffer;
|
|
|
|
INSERT_PADDING_WORDS(0x20);
|
|
|
|
enum class TriangleTopology : u32 {
|
|
List = 0,
|
|
Strip = 1,
|
|
Fan = 2,
|
|
Shader = 3, // Programmable setup unit implemented in a geometry shader
|
|
};
|
|
|
|
BitField<8, 2, TriangleTopology> triangle_topology;
|
|
|
|
INSERT_PADDING_WORDS(0x21);
|
|
|
|
struct ShaderConfig {
|
|
BitField<0, 16, u32> bool_uniforms;
|
|
|
|
union {
|
|
BitField< 0, 8, u32> x;
|
|
BitField< 8, 8, u32> y;
|
|
BitField<16, 8, u32> z;
|
|
BitField<24, 8, u32> w;
|
|
} int_uniforms[4];
|
|
|
|
INSERT_PADDING_WORDS(0x5);
|
|
|
|
// Offset to shader program entry point (in words)
|
|
BitField<0, 16, u32> main_offset;
|
|
|
|
union {
|
|
BitField< 0, 4, u64> attribute0_register;
|
|
BitField< 4, 4, u64> attribute1_register;
|
|
BitField< 8, 4, u64> attribute2_register;
|
|
BitField<12, 4, u64> attribute3_register;
|
|
BitField<16, 4, u64> attribute4_register;
|
|
BitField<20, 4, u64> attribute5_register;
|
|
BitField<24, 4, u64> attribute6_register;
|
|
BitField<28, 4, u64> attribute7_register;
|
|
BitField<32, 4, u64> attribute8_register;
|
|
BitField<36, 4, u64> attribute9_register;
|
|
BitField<40, 4, u64> attribute10_register;
|
|
BitField<44, 4, u64> attribute11_register;
|
|
BitField<48, 4, u64> attribute12_register;
|
|
BitField<52, 4, u64> attribute13_register;
|
|
BitField<56, 4, u64> attribute14_register;
|
|
BitField<60, 4, u64> attribute15_register;
|
|
|
|
int GetRegisterForAttribute(int attribute_index) const {
|
|
u64 fields[] = {
|
|
attribute0_register, attribute1_register, attribute2_register, attribute3_register,
|
|
attribute4_register, attribute5_register, attribute6_register, attribute7_register,
|
|
attribute8_register, attribute9_register, attribute10_register, attribute11_register,
|
|
attribute12_register, attribute13_register, attribute14_register, attribute15_register,
|
|
};
|
|
return (int)fields[attribute_index];
|
|
}
|
|
} input_register_map;
|
|
|
|
// OUTMAP_MASK, 0x28E, CODETRANSFER_END
|
|
INSERT_PADDING_WORDS(0x3);
|
|
|
|
struct {
|
|
enum Format : u32
|
|
{
|
|
FLOAT24 = 0,
|
|
FLOAT32 = 1
|
|
};
|
|
|
|
bool IsFloat32() const {
|
|
return format == FLOAT32;
|
|
}
|
|
|
|
union {
|
|
// Index of the next uniform to write to
|
|
// TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
|
|
// TODO: Maybe the uppermost index is for the geometry shader? Investigate!
|
|
BitField<0, 7, u32> index;
|
|
|
|
BitField<31, 1, Format> format;
|
|
};
|
|
|
|
// Writing to these registers sets the current uniform.
|
|
u32 set_value[8];
|
|
|
|
} uniform_setup;
|
|
|
|
INSERT_PADDING_WORDS(0x2);
|
|
|
|
struct {
|
|
// Offset of the next instruction to write code to.
|
|
// Incremented with each instruction write.
|
|
u32 offset;
|
|
|
|
// Writing to these registers sets the "current" word in the shader program.
|
|
u32 set_word[8];
|
|
} program;
|
|
|
|
INSERT_PADDING_WORDS(0x1);
|
|
|
|
// This register group is used to load an internal table of swizzling patterns,
|
|
// which are indexed by each shader instruction to specify vector component swizzling.
|
|
struct {
|
|
// Offset of the next swizzle pattern to write code to.
|
|
// Incremented with each instruction write.
|
|
u32 offset;
|
|
|
|
// Writing to these registers sets the current swizzle pattern in the table.
|
|
u32 set_word[8];
|
|
} swizzle_patterns;
|
|
|
|
INSERT_PADDING_WORDS(0x2);
|
|
};
|
|
|
|
ShaderConfig gs;
|
|
ShaderConfig vs;
|
|
|
|
INSERT_PADDING_WORDS(0x20);
|
|
|
|
// Map register indices to names readable by humans
|
|
// Used for debugging purposes, so performance is not an issue here
|
|
static std::string GetCommandName(int index);
|
|
|
|
static inline size_t NumIds() {
|
|
return sizeof(Regs) / sizeof(u32);
|
|
}
|
|
|
|
u32& operator [] (int index) const {
|
|
u32* content = (u32*)this;
|
|
return content[index];
|
|
}
|
|
|
|
u32& operator [] (int index) {
|
|
u32* content = (u32*)this;
|
|
return content[index];
|
|
}
|
|
|
|
private:
|
|
/*
|
|
* Most physical addresses which Pica registers refer to are 8-byte aligned.
|
|
* This function should be used to get the address from a raw register value.
|
|
*/
|
|
static inline u32 DecodeAddressRegister(u32 register_value) {
|
|
return register_value * 8;
|
|
}
|
|
};
|
|
|
|
// TODO: MSVC does not support using offsetof() on non-static data members even though this
|
|
// is technically allowed since C++11. This macro should be enabled once MSVC adds
|
|
// support for that.
|
|
#ifndef _MSC_VER
|
|
#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position")
|
|
|
|
ASSERT_REG_POSITION(trigger_irq, 0x10);
|
|
ASSERT_REG_POSITION(cull_mode, 0x40);
|
|
ASSERT_REG_POSITION(viewport_size_x, 0x41);
|
|
ASSERT_REG_POSITION(viewport_size_y, 0x43);
|
|
ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
|
|
ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
|
|
ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
|
|
ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
|
|
ASSERT_REG_POSITION(viewport_corner, 0x68);
|
|
ASSERT_REG_POSITION(texture0_enable, 0x80);
|
|
ASSERT_REG_POSITION(texture0, 0x81);
|
|
ASSERT_REG_POSITION(texture0_format, 0x8e);
|
|
ASSERT_REG_POSITION(texture1, 0x91);
|
|
ASSERT_REG_POSITION(texture1_format, 0x96);
|
|
ASSERT_REG_POSITION(texture2, 0x99);
|
|
ASSERT_REG_POSITION(texture2_format, 0x9e);
|
|
ASSERT_REG_POSITION(tev_stage0, 0xc0);
|
|
ASSERT_REG_POSITION(tev_stage1, 0xc8);
|
|
ASSERT_REG_POSITION(tev_stage2, 0xd0);
|
|
ASSERT_REG_POSITION(tev_stage3, 0xd8);
|
|
ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
|
|
ASSERT_REG_POSITION(tev_stage4, 0xf0);
|
|
ASSERT_REG_POSITION(tev_stage5, 0xf8);
|
|
ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
|
|
ASSERT_REG_POSITION(output_merger, 0x100);
|
|
ASSERT_REG_POSITION(framebuffer, 0x110);
|
|
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
|
ASSERT_REG_POSITION(index_array, 0x227);
|
|
ASSERT_REG_POSITION(num_vertices, 0x228);
|
|
ASSERT_REG_POSITION(trigger_draw, 0x22e);
|
|
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
|
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
|
|
ASSERT_REG_POSITION(command_buffer, 0x238);
|
|
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
|
ASSERT_REG_POSITION(gs, 0x280);
|
|
ASSERT_REG_POSITION(vs, 0x2b0);
|
|
|
|
#undef ASSERT_REG_POSITION
|
|
#endif // !defined(_MSC_VER)
|
|
|
|
static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size");
|
|
|
|
// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
|
|
static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
|
|
static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
|
|
|
|
struct float24 {
|
|
static float24 FromFloat32(float val) {
|
|
float24 ret;
|
|
ret.value = val;
|
|
return ret;
|
|
}
|
|
|
|
// 16 bit mantissa, 7 bit exponent, 1 bit sign
|
|
// TODO: No idea if this works as intended
|
|
static float24 FromRawFloat24(u32 hex) {
|
|
float24 ret;
|
|
if ((hex & 0xFFFFFF) == 0) {
|
|
ret.value = 0;
|
|
} else {
|
|
u32 mantissa = hex & 0xFFFF;
|
|
u32 exponent = (hex >> 16) & 0x7F;
|
|
u32 sign = hex >> 23;
|
|
ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
|
|
if (sign)
|
|
ret.value = -ret.value;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static float24 Zero() {
|
|
return FromFloat32(0.f);
|
|
}
|
|
|
|
// Not recommended for anything but logging
|
|
float ToFloat32() const {
|
|
return value;
|
|
}
|
|
|
|
float24 operator * (const float24& flt) const {
|
|
if ((this->value == 0.f && !std::isnan(flt.value)) ||
|
|
(flt.value == 0.f && !std::isnan(this->value)))
|
|
// PICA gives 0 instead of NaN when multiplying by inf
|
|
return Zero();
|
|
return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
|
|
}
|
|
|
|
float24 operator / (const float24& flt) const {
|
|
return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
|
|
}
|
|
|
|
float24 operator + (const float24& flt) const {
|
|
return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
|
|
}
|
|
|
|
float24 operator - (const float24& flt) const {
|
|
return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
|
|
}
|
|
|
|
float24& operator *= (const float24& flt) {
|
|
if ((this->value == 0.f && !std::isnan(flt.value)) ||
|
|
(flt.value == 0.f && !std::isnan(this->value)))
|
|
// PICA gives 0 instead of NaN when multiplying by inf
|
|
*this = Zero();
|
|
else value *= flt.ToFloat32();
|
|
return *this;
|
|
}
|
|
|
|
float24& operator /= (const float24& flt) {
|
|
value /= flt.ToFloat32();
|
|
return *this;
|
|
}
|
|
|
|
float24& operator += (const float24& flt) {
|
|
value += flt.ToFloat32();
|
|
return *this;
|
|
}
|
|
|
|
float24& operator -= (const float24& flt) {
|
|
value -= flt.ToFloat32();
|
|
return *this;
|
|
}
|
|
|
|
float24 operator - () const {
|
|
return float24::FromFloat32(-ToFloat32());
|
|
}
|
|
|
|
bool operator < (const float24& flt) const {
|
|
return ToFloat32() < flt.ToFloat32();
|
|
}
|
|
|
|
bool operator > (const float24& flt) const {
|
|
return ToFloat32() > flt.ToFloat32();
|
|
}
|
|
|
|
bool operator >= (const float24& flt) const {
|
|
return ToFloat32() >= flt.ToFloat32();
|
|
}
|
|
|
|
bool operator <= (const float24& flt) const {
|
|
return ToFloat32() <= flt.ToFloat32();
|
|
}
|
|
|
|
bool operator == (const float24& flt) const {
|
|
return ToFloat32() == flt.ToFloat32();
|
|
}
|
|
|
|
bool operator != (const float24& flt) const {
|
|
return ToFloat32() != flt.ToFloat32();
|
|
}
|
|
|
|
private:
|
|
// Stored as a regular float, merely for convenience
|
|
// TODO: Perform proper arithmetic on this!
|
|
float value;
|
|
};
|
|
static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
|
|
|
|
/// Struct used to describe current Pica state
|
|
struct State {
|
|
/// Pica registers
|
|
Regs regs;
|
|
|
|
/// Vertex shader memory
|
|
struct ShaderSetup {
|
|
struct {
|
|
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
|
// therefore required to be 16-byte aligned.
|
|
Math::Vec4<float24> MEMORY_ALIGNED16(f[96]);
|
|
|
|
std::array<bool, 16> b;
|
|
std::array<Math::Vec4<u8>, 4> i;
|
|
} uniforms;
|
|
|
|
Math::Vec4<float24> default_attributes[16];
|
|
|
|
std::array<u32, 1024> program_code;
|
|
std::array<u32, 1024> swizzle_data;
|
|
};
|
|
|
|
ShaderSetup vs;
|
|
ShaderSetup gs;
|
|
|
|
/// Current Pica command list
|
|
struct {
|
|
const u32* head_ptr;
|
|
const u32* current_ptr;
|
|
u32 length;
|
|
} cmd_list;
|
|
};
|
|
|
|
/// Initialize Pica state
|
|
void Init();
|
|
|
|
/// Shutdown Pica state
|
|
void Shutdown();
|
|
|
|
extern State g_state; ///< Current Pica state
|
|
|
|
} // namespace
|