forked from suyu/suyu
decoders: correct block calculation
This commit is contained in:
parent
3dd7643214
commit
7232a1ed16
7 changed files with 41 additions and 29 deletions
|
@ -39,7 +39,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
|
||||||
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0);
|
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0);
|
||||||
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0);
|
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0);
|
||||||
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||||
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
|
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0);
|
||||||
tmp_buffer.resize(dst_size);
|
tmp_buffer.resize(dst_size);
|
||||||
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||||
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
|
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
|
||||||
|
|
|
@ -39,15 +39,15 @@ struct Registers {
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 BlockWidth() const {
|
u32 BlockWidth() const {
|
||||||
return block_width;
|
return block_width.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 BlockHeight() const {
|
u32 BlockHeight() const {
|
||||||
return block_height;
|
return block_height.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 BlockDepth() const {
|
u32 BlockDepth() const {
|
||||||
return block_depth;
|
return block_depth.Value();
|
||||||
}
|
}
|
||||||
} dest;
|
} dest;
|
||||||
};
|
};
|
||||||
|
|
|
@ -84,15 +84,15 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 BlockWidth() const {
|
u32 BlockWidth() const {
|
||||||
return block_width;
|
return block_width.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 BlockHeight() const {
|
u32 BlockHeight() const {
|
||||||
return block_height;
|
return block_height.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 BlockDepth() const {
|
u32 BlockDepth() const {
|
||||||
return block_depth;
|
return block_depth.Value();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||||
|
|
|
@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() {
|
||||||
|
|
||||||
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
|
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(regs.dst_params.BlockDepth() == 0);
|
ASSERT(regs.dst_params.BlockDepth() == 1);
|
||||||
|
|
||||||
const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
|
const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
|
||||||
|
|
||||||
|
|
|
@ -59,11 +59,11 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 BlockHeight() const {
|
u32 BlockHeight() const {
|
||||||
return block_height;
|
return block_height.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 BlockDepth() const {
|
u32 BlockDepth() const {
|
||||||
return block_depth;
|
return block_depth.Value();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -335,6 +335,9 @@ private:
|
||||||
if (untopological == MatchTopologyResult::CompressUnmatch) {
|
if (untopological == MatchTopologyResult::CompressUnmatch) {
|
||||||
return RecycleStrategy::Flush;
|
return RecycleStrategy::Flush;
|
||||||
}
|
}
|
||||||
|
if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
|
||||||
|
return RecycleStrategy::Flush;
|
||||||
|
}
|
||||||
return RecycleStrategy::Ignore;
|
return RecycleStrategy::Ignore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,6 +375,11 @@ private:
|
||||||
}
|
}
|
||||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||||
}
|
}
|
||||||
|
case RecycleStrategy::BufferCopy: {
|
||||||
|
auto new_surface = GetUncachedSurface(gpu_addr, params);
|
||||||
|
BufferCopy(overlaps[0], new_surface);
|
||||||
|
return {new_surface, new_surface->GetMainView()};
|
||||||
|
}
|
||||||
default: {
|
default: {
|
||||||
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
|
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
|
||||||
return InitializeSurface(gpu_addr, params, do_load);
|
return InitializeSurface(gpu_addr, params, do_load);
|
||||||
|
@ -520,6 +528,10 @@ private:
|
||||||
const auto host_ptr{memory_manager->GetPointer(gpu_addr)};
|
const auto host_ptr{memory_manager->GetPointer(gpu_addr)};
|
||||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||||
|
|
||||||
|
if (gpu_addr == 0x00000001682F0000ULL) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "Here's the texture!");
|
||||||
|
}
|
||||||
|
|
||||||
// Step 0: guarantee a valid surface
|
// Step 0: guarantee a valid surface
|
||||||
if (!cache_addr) {
|
if (!cache_addr) {
|
||||||
// Return a null surface if it's invalid
|
// Return a null surface if it's invalid
|
||||||
|
@ -566,6 +578,10 @@ private:
|
||||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!params.is_tiled) {
|
||||||
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||||
|
MatchTopologyResult::FullMatch);
|
||||||
|
}
|
||||||
// Step 3
|
// Step 3
|
||||||
// Now we need to figure the relationship between the texture and its overlaps
|
// Now we need to figure the relationship between the texture and its overlaps
|
||||||
// we do a topological test to ensure we can find some relationship. If it fails
|
// we do a topological test to ensure we can find some relationship. If it fails
|
||||||
|
|
|
@ -256,19 +256,18 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
|
||||||
}
|
}
|
||||||
|
|
||||||
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
|
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
|
||||||
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
|
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit) {
|
||||||
const u32 block_height_size{1U << block_height};
|
const u32 block_height = 1U << block_height_bit;
|
||||||
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
|
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
|
||||||
gob_size_x};
|
gob_size_x};
|
||||||
for (u32 line = 0; line < subrect_height; ++line) {
|
for (u32 line = 0; line < subrect_height; ++line) {
|
||||||
const u32 gob_address_y =
|
const u32 gob_address_y =
|
||||||
(line / (gob_size_y * block_height_size)) * gob_size * block_height_size *
|
(line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
|
||||||
image_width_in_gobs +
|
((line % (gob_size_y * block_height)) / gob_size_y) * gob_size;
|
||||||
((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size;
|
|
||||||
const auto& table = legacy_swizzle_table[line % gob_size_y];
|
const auto& table = legacy_swizzle_table[line % gob_size_y];
|
||||||
for (u32 x = 0; x < subrect_width; ++x) {
|
for (u32 x = 0; x < subrect_width; ++x) {
|
||||||
const u32 gob_address =
|
const u32 gob_address =
|
||||||
gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size;
|
gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
|
||||||
const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
|
const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
|
||||||
u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
|
u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
|
||||||
u8* dest_addr = swizzled_data + swizzled_offset;
|
u8* dest_addr = swizzled_data + swizzled_offset;
|
||||||
|
@ -279,19 +278,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
|
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
|
||||||
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
|
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit,
|
||||||
u32 offset_x, u32 offset_y) {
|
u32 offset_x, u32 offset_y) {
|
||||||
const u32 block_height_size{1U << block_height};
|
const u32 block_height = 1U << block_height_bit;
|
||||||
for (u32 line = 0; line < subrect_height; ++line) {
|
for (u32 line = 0; line < subrect_height; ++line) {
|
||||||
const u32 y2 = line + offset_y;
|
const u32 y2 = line + offset_y;
|
||||||
const u32 gob_address_y =
|
const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
|
||||||
(y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size +
|
((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size;
|
||||||
((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size;
|
|
||||||
const auto& table = legacy_swizzle_table[y2 % gob_size_y];
|
const auto& table = legacy_swizzle_table[y2 % gob_size_y];
|
||||||
for (u32 x = 0; x < subrect_width; ++x) {
|
for (u32 x = 0; x < subrect_width; ++x) {
|
||||||
const u32 x2 = (x + offset_x) * bytes_per_pixel;
|
const u32 x2 = (x + offset_x) * bytes_per_pixel;
|
||||||
const u32 gob_address =
|
const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
|
||||||
gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size;
|
|
||||||
const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
|
const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
|
||||||
u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
|
u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
|
||||||
u8* source_addr = swizzled_data + swizzled_offset;
|
u8* source_addr = swizzled_data + swizzled_offset;
|
||||||
|
@ -302,20 +299,19 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
|
||||||
}
|
}
|
||||||
|
|
||||||
void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
|
void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
|
||||||
const u32 block_height, const std::size_t copy_size, const u8* source_data,
|
const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
|
||||||
u8* swizzle_data) {
|
u8* swizzle_data) {
|
||||||
const u32 block_height_size{1U << block_height};
|
const u32 block_height = 1U << block_height_bit;
|
||||||
const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
|
const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
|
||||||
std::size_t count = 0;
|
std::size_t count = 0;
|
||||||
for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
|
for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
|
||||||
const std::size_t gob_address_y =
|
const std::size_t gob_address_y =
|
||||||
(y / (gob_size_y * block_height_size)) * gob_size * block_height_size *
|
(y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
|
||||||
image_width_in_gobs +
|
((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
|
||||||
((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size;
|
|
||||||
const auto& table = legacy_swizzle_table[y % gob_size_y];
|
const auto& table = legacy_swizzle_table[y % gob_size_y];
|
||||||
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
|
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
|
||||||
const std::size_t gob_address =
|
const std::size_t gob_address =
|
||||||
gob_address_y + (x / gob_size_x) * gob_size * block_height_size;
|
gob_address_y + (x / gob_size_x) * gob_size * block_height;
|
||||||
const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
|
const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
|
||||||
const u8* source_line = source_data + count;
|
const u8* source_line = source_data + count;
|
||||||
u8* dest_addr = swizzle_data + swizzled_offset;
|
u8* dest_addr = swizzle_data + swizzled_offset;
|
||||||
|
|
Loading…
Reference in a new issue