1
0
Fork 0
forked from suyu/suyu

Merge pull request #10457 from Kelebek1/optimise

Remove memory allocations in some hot paths
This commit is contained in:
bunnei 2023-06-22 21:53:07 -07:00 committed by GitHub
commit 2fc5dedf69
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
84 changed files with 501 additions and 458 deletions

View file

@ -7,6 +7,7 @@
#include <mutex> #include <mutex>
#include <span> #include <span>
#include <vector> #include <vector>
#include <boost/container/static_vector.hpp>
#include "audio_buffer.h" #include "audio_buffer.h"
#include "audio_core/device/device_session.h" #include "audio_core/device/device_session.h"
@ -48,7 +49,7 @@ public:
* *
* @param out_buffers - The buffers which were registered. * @param out_buffers - The buffers which were registered.
*/ */
void RegisterBuffers(std::vector<AudioBuffer>& out_buffers) { void RegisterBuffers(boost::container::static_vector<AudioBuffer, N>& out_buffers) {
std::scoped_lock l{lock}; std::scoped_lock l{lock};
const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit), const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit),
BufferAppendLimit - registered_count)}; BufferAppendLimit - registered_count)};
@ -162,7 +163,8 @@ public:
* @param max_buffers - Maximum number of buffers to released. * @param max_buffers - Maximum number of buffers to released.
* @return The number of buffers released. * @return The number of buffers released.
*/ */
u32 GetRegisteredAppendedBuffers(std::vector<AudioBuffer>& buffers_flushed, u32 max_buffers) { u32 GetRegisteredAppendedBuffers(
boost::container::static_vector<AudioBuffer, N>& buffers_flushed, u32 max_buffers) {
std::scoped_lock l{lock}; std::scoped_lock l{lock};
if (registered_count + appended_count == 0) { if (registered_count + appended_count == 0) {
return 0; return 0;
@ -270,7 +272,7 @@ public:
*/ */
bool FlushBuffers(u32& buffers_released) { bool FlushBuffers(u32& buffers_released) {
std::scoped_lock l{lock}; std::scoped_lock l{lock};
std::vector<AudioBuffer> buffers_flushed{}; boost::container::static_vector<AudioBuffer, N> buffers_flushed{};
buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit); buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit);

View file

@ -79,7 +79,7 @@ void DeviceSession::ClearBuffers() {
} }
} }
void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const { void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) {
for (const auto& buffer : buffers) { for (const auto& buffer : buffers) {
Sink::SinkBuffer new_buffer{ Sink::SinkBuffer new_buffer{
.frames = buffer.size / (channel_count * sizeof(s16)), .frames = buffer.size / (channel_count * sizeof(s16)),
@ -88,13 +88,13 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const {
.consumed = false, .consumed = false,
}; };
tmp_samples.resize_destructive(buffer.size / sizeof(s16));
if (type == Sink::StreamType::In) { if (type == Sink::StreamType::In) {
std::vector<s16> samples{}; stream->AppendBuffer(new_buffer, tmp_samples);
stream->AppendBuffer(new_buffer, samples);
} else { } else {
std::vector<s16> samples(buffer.size / sizeof(s16)); system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(),
system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, samples.data(), buffer.size); buffer.size);
stream->AppendBuffer(new_buffer, samples); stream->AppendBuffer(new_buffer, tmp_samples);
} }
} }
} }

View file

@ -10,6 +10,7 @@
#include "audio_core/common/common.h" #include "audio_core/common/common.h"
#include "audio_core/sink/sink.h" #include "audio_core/sink/sink.h"
#include "common/scratch_buffer.h"
#include "core/hle/service/audio/errors.h" #include "core/hle/service/audio/errors.h"
namespace Core { namespace Core {
@ -62,7 +63,7 @@ public:
* *
* @param buffers - The buffers to play. * @param buffers - The buffers to play.
*/ */
void AppendBuffers(std::span<const AudioBuffer> buffers) const; void AppendBuffers(std::span<const AudioBuffer> buffers);
/** /**
* (Audio In only) Pop samples from the backend, and write them back to this buffer's address. * (Audio In only) Pop samples from the backend, and write them back to this buffer's address.
@ -146,8 +147,8 @@ private:
std::shared_ptr<Core::Timing::EventType> thread_event; std::shared_ptr<Core::Timing::EventType> thread_event;
/// Is this session initialised? /// Is this session initialised?
bool initialized{}; bool initialized{};
/// Buffer queue /// Temporary sample buffer
std::vector<AudioBuffer> buffer_queue{}; Common::ScratchBuffer<s16> tmp_samples{};
}; };
} // namespace AudioCore } // namespace AudioCore

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex> #include <mutex>
#include "audio_core/audio_event.h" #include "audio_core/audio_event.h"
#include "audio_core/audio_manager.h" #include "audio_core/audio_manager.h"
#include "audio_core/in/audio_in_system.h" #include "audio_core/in/audio_in_system.h"
@ -89,7 +90,7 @@ Result System::Start() {
session->Start(); session->Start();
state = State::Started; state = State::Started;
std::vector<AudioBuffer> buffers_to_flush{}; boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
buffers.RegisterBuffers(buffers_to_flush); buffers.RegisterBuffers(buffers_to_flush);
session->AppendBuffers(buffers_to_flush); session->AppendBuffers(buffers_to_flush);
session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@ -134,7 +135,7 @@ bool System::AppendBuffer(const AudioInBuffer& buffer, const u64 tag) {
void System::RegisterBuffers() { void System::RegisterBuffers() {
if (state == State::Started) { if (state == State::Started) {
std::vector<AudioBuffer> registered_buffers{}; boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
buffers.RegisterBuffers(registered_buffers); buffers.RegisterBuffers(registered_buffers);
session->AppendBuffers(registered_buffers); session->AppendBuffers(registered_buffers);
} }

View file

@ -89,7 +89,7 @@ Result System::Start() {
session->Start(); session->Start();
state = State::Started; state = State::Started;
std::vector<AudioBuffer> buffers_to_flush{}; boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
buffers.RegisterBuffers(buffers_to_flush); buffers.RegisterBuffers(buffers_to_flush);
session->AppendBuffers(buffers_to_flush); session->AppendBuffers(buffers_to_flush);
session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@ -134,7 +134,7 @@ bool System::AppendBuffer(const AudioOutBuffer& buffer, u64 tag) {
void System::RegisterBuffers() { void System::RegisterBuffers() {
if (state == State::Started) { if (state == State::Started) {
std::vector<AudioBuffer> registered_buffers{}; boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
buffers.RegisterBuffers(registered_buffers); buffers.RegisterBuffers(registered_buffers);
session->AppendBuffers(registered_buffers); session->AppendBuffers(registered_buffers);
} }

View file

@ -8,6 +8,7 @@
#include "audio_core/renderer/command/resample/resample.h" #include "audio_core/renderer/command/resample/resample.h"
#include "common/fixed_point.h" #include "common/fixed_point.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/scratch_buffer.h"
#include "core/memory.h" #include "core/memory.h"
namespace AudioCore::AudioRenderer { namespace AudioCore::AudioRenderer {
@ -27,6 +28,7 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4};
template <typename T> template <typename T>
static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const DecodeArg& req) { const DecodeArg& req) {
std::array<T, TempBufferSize> tmp_samples{};
constexpr s32 min{std::numeric_limits<s16>::min()}; constexpr s32 min{std::numeric_limits<s16>::min()};
constexpr s32 max{std::numeric_limits<s16>::max()}; constexpr s32 max{std::numeric_limits<s16>::max()};
@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const u64 size{channel_count * samples_to_decode}; const u64 size{channel_count * samples_to_decode};
const u64 size_bytes{size * sizeof(T)}; const u64 size_bytes{size * sizeof(T)};
std::vector<T> samples(size); memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes);
memory.ReadBlockUnsafe(source, samples.data(), size_bytes);
if constexpr (std::is_floating_point_v<T>) { if constexpr (std::is_floating_point_v<T>) {
for (u32 i = 0; i < samples_to_decode; i++) { for (u32 i = 0; i < samples_to_decode; i++) {
auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
std::numeric_limits<s16>::max())}; std::numeric_limits<s16>::max())};
out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
} }
} else { } else {
for (u32 i = 0; i < samples_to_decode; i++) { for (u32 i = 0; i < samples_to_decode; i++) {
out_buffer[i] = samples[i * channel_count + req.target_channel]; out_buffer[i] = tmp_samples[i * channel_count + req.target_channel];
} }
} }
} break; } break;
@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
} }
const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))};
std::vector<T> samples(samples_to_decode); memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T));
memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T));
if constexpr (std::is_floating_point_v<T>) { if constexpr (std::is_floating_point_v<T>) {
for (u32 i = 0; i < samples_to_decode; i++) { for (u32 i = 0; i < samples_to_decode; i++) {
auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
std::numeric_limits<s16>::max())}; std::numeric_limits<s16>::max())};
out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
} }
} else { } else {
std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16));
} }
break; break;
} }
@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
*/ */
static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const DecodeArg& req) { const DecodeArg& req) {
std::array<u8, TempBufferSize> wavebuffer{};
constexpr u32 SamplesPerFrame{14}; constexpr u32 SamplesPerFrame{14};
constexpr u32 NibblesPerFrame{16}; constexpr u32 NibblesPerFrame{16};
@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
} }
const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)};
std::vector<u8> wavebuffer(size); memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size);
memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(),
wavebuffer.size());
auto context{req.adpcm_context}; auto context{req.adpcm_context};
auto header{context->header}; auto header{context->header};
@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf
u32 offset{voice_state.offset}; u32 offset{voice_state.offset};
auto output_buffer{args.output}; auto output_buffer{args.output};
std::vector<s16> temp_buffer(TempBufferSize, 0); std::array<s16, TempBufferSize> temp_buffer{};
while (remaining_sample_count > 0) { while (remaining_sample_count > 0) {
const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)};

View file

@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2&
static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params, static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params,
CompressorInfo::State& state, bool enabled, CompressorInfo::State& state, bool enabled,
std::vector<std::span<const s32>> input_buffers, std::span<std::span<const s32>> input_buffers,
std::vector<std::span<s32>> output_buffers, u32 sample_count) { std::span<std::span<s32>> output_buffers, u32 sample_count) {
if (enabled) { if (enabled) {
auto state_00{state.unk_00}; auto state_00{state.unk_00};
auto state_04{state.unk_04}; auto state_04{state.unk_04};
@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
} }
void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) { void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) {
std::vector<std::span<const s32>> input_buffers(parameter.channel_count); std::array<std::span<const s32>, MaxChannels> input_buffers{};
std::vector<std::span<s32>> output_buffers(parameter.channel_count); std::array<std::span<s32>, MaxChannels> output_buffers{};
for (s16 i = 0; i < parameter.channel_count; i++) { for (s16 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,

View file

@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor(); state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor();
state.delay_lines[channel].sample_count = sample_count.to_int_floor(); state.delay_lines[channel].sample_count = sample_count.to_int_floor();
state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0); state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0);
if (state.delay_lines[channel].buffer.size() == 0) { if (state.delay_lines[channel].sample_count == 0) {
state.delay_lines[channel].buffer.push_back(0); state.delay_lines[channel].buffer.push_back(0);
} }
state.delay_lines[channel].buffer_pos = 0; state.delay_lines[channel].buffer_pos = 0;
@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
*/ */
template <size_t NumChannels> template <size_t NumChannels>
static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
std::vector<std::span<const s32>>& inputs, std::span<std::span<const s32>> inputs, std::span<std::span<s32>> outputs,
std::vector<std::span<s32>>& outputs, const u32 sample_count) { const u32 sample_count) {
for (u32 sample_index = 0; sample_index < sample_count; sample_index++) { for (u32 sample_index = 0; sample_index < sample_count; sample_index++) {
std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{}; std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{};
for (u32 channel = 0; channel < NumChannels; channel++) { for (u32 channel = 0; channel < NumChannels; channel++) {
@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St
* @param sample_count - Number of samples to process. * @param sample_count - Number of samples to process.
*/ */
static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
const bool enabled, std::vector<std::span<const s32>>& inputs, const bool enabled, std::span<std::span<const s32>> inputs,
std::vector<std::span<s32>>& outputs, const u32 sample_count) { std::span<std::span<s32>> outputs, const u32 sample_count) {
if (!IsChannelCountValid(params.channel_count)) { if (!IsChannelCountValid(params.channel_count)) {
LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count); LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count);
@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce
} }
void DelayCommand::Process(const ADSP::CommandListProcessor& processor) { void DelayCommand::Process(const ADSP::CommandListProcessor& processor) {
std::vector<std::span<const s32>> input_buffers(parameter.channel_count); std::array<std::span<const s32>, MaxChannels> input_buffers{};
std::vector<std::span<s32>> output_buffers(parameter.channel_count); std::array<std::span<s32>, MaxChannels> output_buffers{};
for (s16 i = 0; i < parameter.channel_count; i++) { for (s16 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,

View file

@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
} }
void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
std::vector<std::span<const s32>> input_buffers(parameter.channel_count); std::array<std::span<const s32>, MaxChannels> input_buffers{};
std::vector<std::span<s32>> output_buffers(parameter.channel_count); std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) { for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,

View file

@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio
*/ */
static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params, static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params,
LightLimiterInfo::State& state, const bool enabled, LightLimiterInfo::State& state, const bool enabled,
std::vector<std::span<const s32>>& inputs, std::span<std::span<const s32>> inputs,
std::vector<std::span<s32>>& outputs, const u32 sample_count, std::span<std::span<s32>> outputs, const u32 sample_count,
LightLimiterInfo::StatisticsInternal* statistics) { LightLimiterInfo::StatisticsInternal* statistics) {
constexpr s64 min{std::numeric_limits<s32>::min()}; constexpr s64 min{std::numeric_limits<s32>::min()};
constexpr s64 max{std::numeric_limits<s32>::max()}; constexpr s64 max{std::numeric_limits<s32>::max()};
@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP
} }
void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) { void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) {
std::vector<std::span<const s32>> input_buffers(parameter.channel_count); std::array<std::span<const s32>, MaxChannels> input_buffers{};
std::vector<std::span<s32>> output_buffers(parameter.channel_count); std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) { for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP
} }
void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) { void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) {
std::vector<std::span<const s32>> input_buffers(parameter.channel_count); std::array<std::span<const s32>, MaxChannels> input_buffers{};
std::vector<std::span<s32>> output_buffers(parameter.channel_count); std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) { for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,

View file

@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine&
*/ */
template <size_t NumChannels> template <size_t NumChannels>
static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
std::vector<std::span<const s32>>& inputs, std::span<std::span<const s32>> inputs,
std::vector<std::span<s32>>& outputs, const u32 sample_count) { std::span<std::span<s32>> outputs, const u32 sample_count) {
static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{ static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}; };
@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever
* @param sample_count - Number of samples to process. * @param sample_count - Number of samples to process.
*/ */
static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
const bool enabled, std::vector<std::span<const s32>>& inputs, const bool enabled, std::span<std::span<const s32>> inputs,
std::vector<std::span<s32>>& outputs, const u32 sample_count) { std::span<std::span<s32>> outputs, const u32 sample_count) {
if (enabled) { if (enabled) {
switch (params.channel_count) { switch (params.channel_count) {
case 0: case 0:
@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc
} }
void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
std::vector<std::span<const s32>> input_buffers(parameter.channel_count); std::array<std::span<const s32>, MaxChannels> input_buffers{};
std::vector<std::span<s32>> output_buffers(parameter.channel_count); std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) { for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,

View file

@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
constexpr s32 min{std::numeric_limits<s16>::min()}; constexpr s32 min{std::numeric_limits<s16>::min()};
constexpr s32 max{std::numeric_limits<s16>::max()}; constexpr s32 max{std::numeric_limits<s16>::max()};
std::vector<s16> output(processor.sample_count); std::array<s16, TargetSampleCount * MaxChannels> output{};
for (u32 channel = 0; channel < input_count; channel++) { for (u32 channel = 0; channel < input_count; channel++) {
auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count, auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count,
processor.sample_count)}; processor.sample_count)};
@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
} }
processor.memory->WriteBlockUnsafe(address + pos, output.data(), processor.memory->WriteBlockUnsafe(address + pos, output.data(),
output.size() * sizeof(s16)); processor.sample_count * sizeof(s16));
pos += static_cast<u32>(processor.sample_count * sizeof(s16)); pos += static_cast<u32>(processor.sample_count * sizeof(s16));
if (pos >= size) { if (pos >= size) {
pos = 0; pos = 0;

View file

@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
.consumed{false}, .consumed{false},
}; };
std::vector<s16> samples(out_buffer.frames * input_count); std::array<s16, TargetSampleCount * MaxChannels> samples{};
for (u32 channel = 0; channel < input_count; channel++) { for (u32 channel = 0; channel < input_count; channel++) {
const auto offset{inputs[channel] * out_buffer.frames}; const auto offset{inputs[channel] * out_buffer.frames};
@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
} }
out_buffer.tag = reinterpret_cast<u64>(samples.data()); out_buffer.tag = reinterpret_cast<u64>(samples.data());
stream->AppendBuffer(out_buffer, samples); stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count});
if (stream->IsPaused()) { if (stream->IsPaused()) {
stream->Start(); stream->Start();

View file

@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) {
return false; return false;
} }
std::vector<s32> sorted_results{node_states.GetSortedResuls()}; auto sorted_results{node_states.GetSortedResuls()};
const auto result_size{std::min(count, static_cast<s32>(sorted_results.size()))}; const auto result_size{std::min(count, static_cast<s32>(sorted_results.second))};
for (s32 i = 0; i < result_size; i++) { for (s32 i = 0; i < result_size; i++) {
sorted_mix_infos[i] = &mix_infos[sorted_results[i]]; sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]];
} }
CalcMixBufferOffset(); CalcMixBufferOffset();

View file

@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const {
return node_count; return node_count;
} }
std::vector<s32> NodeStates::GetSortedResuls() const { std::pair<std::span<u32>::reverse_iterator, size_t> NodeStates::GetSortedResuls() const {
return {results.rbegin(), results.rbegin() + result_pos}; return {results.rbegin(), result_pos};
} }
} // namespace AudioCore::AudioRenderer } // namespace AudioCore::AudioRenderer

View file

@ -175,7 +175,7 @@ public:
* *
* @return Vector of nodes in reverse order. * @return Vector of nodes in reverse order.
*/ */
std::vector<s32> GetSortedResuls() const; std::pair<std::span<u32>::reverse_iterator, size_t> GetSortedResuls() const;
private: private:
/// Number of nodes in the graph /// Number of nodes in the graph

View file

@ -444,6 +444,7 @@ Result System::Update(std::span<const u8> input, std::span<u8> performance, std:
std::scoped_lock l{lock}; std::scoped_lock l{lock};
const auto start_time{core.CoreTiming().GetClockTicks()}; const auto start_time{core.CoreTiming().GetClockTicks()};
std::memset(output.data(), 0, output.size());
InfoUpdater info_updater(input, output, process_handle, behavior); InfoUpdater info_updater(input, output, process_handle, behavior);

View file

@ -20,7 +20,7 @@ public:
explicit NullSinkStreamImpl(Core::System& system_, StreamType type_) explicit NullSinkStreamImpl(Core::System& system_, StreamType type_)
: SinkStream{system_, type_} {} : SinkStream{system_, type_} {}
~NullSinkStreamImpl() override {} ~NullSinkStreamImpl() override {}
void AppendBuffer(SinkBuffer&, std::vector<s16>&) override {} void AppendBuffer(SinkBuffer&, std::span<s16>) override {}
std::vector<s16> ReleaseBuffer(u64) override { std::vector<s16> ReleaseBuffer(u64) override {
return {}; return {};
} }

View file

@ -18,7 +18,7 @@
namespace AudioCore::Sink { namespace AudioCore::Sink {
void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) { void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) {
if (type == StreamType::In) { if (type == StreamType::In) {
queue.enqueue(buffer); queue.enqueue(buffer);
queued_buffers++; queued_buffers++;
@ -66,15 +66,16 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
static_cast<s16>(std::clamp(right_sample, min, max)); static_cast<s16>(std::clamp(right_sample, min, max));
} }
samples.resize(samples.size() / system_channels * device_channels); samples = samples.subspan(0, samples.size() / system_channels * device_channels);
} else if (system_channels == 2 && device_channels == 6) { } else if (system_channels == 2 && device_channels == 6) {
// We need moar samples! Not all games will provide 6 channel audio. // We need moar samples! Not all games will provide 6 channel audio.
// TODO: Implement some upmixing here. Currently just passthrough, with other // TODO: Implement some upmixing here. Currently just passthrough, with other
// channels left as silence. // channels left as silence.
std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0); auto new_size = samples.size() / system_channels * device_channels;
tmp_samples.resize_destructive(new_size);
for (u32 read_index = 0, write_index = 0; read_index < samples.size(); for (u32 read_index = 0, write_index = 0; read_index < new_size;
read_index += system_channels, write_index += device_channels) { read_index += system_channels, write_index += device_channels) {
const auto left_sample{static_cast<s16>(std::clamp( const auto left_sample{static_cast<s16>(std::clamp(
static_cast<s32>( static_cast<s32>(
@ -82,7 +83,7 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
volume), volume),
min, max))}; min, max))};
new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample; tmp_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
const auto right_sample{static_cast<s16>(std::clamp( const auto right_sample{static_cast<s16>(std::clamp(
static_cast<s32>( static_cast<s32>(
@ -90,9 +91,9 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
volume), volume),
min, max))}; min, max))};
new_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample; tmp_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample;
} }
samples = std::move(new_samples); samples = std::span<s16>(tmp_samples);
} else if (volume != 1.0f) { } else if (volume != 1.0f) {
for (u32 i = 0; i < samples.size(); i++) { for (u32 i = 0; i < samples.size(); i++) {

View file

@ -16,6 +16,7 @@
#include "common/polyfill_thread.h" #include "common/polyfill_thread.h"
#include "common/reader_writer_queue.h" #include "common/reader_writer_queue.h"
#include "common/ring_buffer.h" #include "common/ring_buffer.h"
#include "common/scratch_buffer.h"
#include "common/thread.h" #include "common/thread.h"
namespace Core { namespace Core {
@ -170,7 +171,7 @@ public:
* @param buffer - Audio buffer information to be queued. * @param buffer - Audio buffer information to be queued.
* @param samples - The s16 samples to be queue for playback. * @param samples - The s16 samples to be queue for playback.
*/ */
virtual void AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples); virtual void AppendBuffer(SinkBuffer& buffer, std::span<s16> samples);
/** /**
* Release a buffer. Audio In only, will fill a buffer with recorded samples. * Release a buffer. Audio In only, will fill a buffer with recorded samples.
@ -255,6 +256,8 @@ private:
/// Signalled when ring buffer entries are consumed /// Signalled when ring buffer entries are consumed
std::condition_variable_any release_cv; std::condition_variable_any release_cv;
std::mutex release_mutex; std::mutex release_mutex;
/// Temporary buffer for appending samples when upmixing
Common::ScratchBuffer<s16> tmp_samples{};
}; };
using SinkStreamPtr = std::unique_ptr<SinkStream>; using SinkStreamPtr = std::unique_ptr<SinkStream>;

View file

@ -9,6 +9,7 @@
#include <cstddef> #include <cstddef>
#include <cstring> #include <cstring>
#include <new> #include <new>
#include <span>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
@ -53,7 +54,7 @@ public:
return push_count; return push_count;
} }
std::size_t Push(const std::vector<T>& input) { std::size_t Push(const std::span<T> input) {
return Push(input.data(), input.size()); return Push(input.data(), input.size());
} }

View file

@ -3,6 +3,9 @@
#pragma once #pragma once
#include <iterator>
#include "common/concepts.h"
#include "common/make_unique_for_overwrite.h" #include "common/make_unique_for_overwrite.h"
namespace Common { namespace Common {
@ -16,6 +19,12 @@ namespace Common {
template <typename T> template <typename T>
class ScratchBuffer { class ScratchBuffer {
public: public:
using iterator = T*;
using const_iterator = const T*;
using value_type = T;
using element_type = T;
using iterator_category = std::contiguous_iterator_tag;
ScratchBuffer() = default; ScratchBuffer() = default;
explicit ScratchBuffer(size_t initial_capacity) explicit ScratchBuffer(size_t initial_capacity)

View file

@ -3,6 +3,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h"
#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/k_synchronization_object.h"
@ -75,7 +76,7 @@ Result KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
KSynchronizationObject** objects, const s32 num_objects, KSynchronizationObject** objects, const s32 num_objects,
s64 timeout) { s64 timeout) {
// Allocate space on stack for thread nodes. // Allocate space on stack for thread nodes.
std::vector<ThreadListNode> thread_nodes(num_objects); std::array<ThreadListNode, Svc::ArgumentHandleCountMax> thread_nodes;
// Prepare for wait. // Prepare for wait.
KThread* thread = GetCurrentThreadPointer(kernel); KThread* thread = GetCurrentThreadPointer(kernel);

View file

@ -909,7 +909,7 @@ Result KThread::SetActivity(Svc::ThreadActivity activity) {
R_SUCCEED(); R_SUCCEED();
} }
Result KThread::GetThreadContext3(std::vector<u8>& out) { Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) {
// Lock ourselves. // Lock ourselves.
KScopedLightLock lk{m_activity_pause_lock}; KScopedLightLock lk{m_activity_pause_lock};
@ -927,15 +927,13 @@ Result KThread::GetThreadContext3(std::vector<u8>& out) {
// Mask away mode bits, interrupt bits, IL bit, and other reserved bits. // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
auto context = GetContext64(); auto context = GetContext64();
context.pstate &= 0xFF0FFE20; context.pstate &= 0xFF0FFE20;
out.resize_destructive(sizeof(context));
out.resize(sizeof(context));
std::memcpy(out.data(), std::addressof(context), sizeof(context)); std::memcpy(out.data(), std::addressof(context), sizeof(context));
} else { } else {
// Mask away mode bits, interrupt bits, IL bit, and other reserved bits. // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
auto context = GetContext32(); auto context = GetContext32();
context.cpsr &= 0xFF0FFE20; context.cpsr &= 0xFF0FFE20;
out.resize_destructive(sizeof(context));
out.resize(sizeof(context));
std::memcpy(out.data(), std::addressof(context), sizeof(context)); std::memcpy(out.data(), std::addressof(context), sizeof(context));
} }
} }

View file

@ -15,6 +15,7 @@
#include "common/intrusive_list.h" #include "common/intrusive_list.h"
#include "common/intrusive_red_black_tree.h" #include "common/intrusive_red_black_tree.h"
#include "common/scratch_buffer.h"
#include "common/spin_lock.h" #include "common/spin_lock.h"
#include "core/arm/arm_interface.h" #include "core/arm/arm_interface.h"
#include "core/hle/kernel/k_affinity_mask.h" #include "core/hle/kernel/k_affinity_mask.h"
@ -567,7 +568,7 @@ public:
void RemoveWaiter(KThread* thread); void RemoveWaiter(KThread* thread);
Result GetThreadContext3(std::vector<u8>& out); Result GetThreadContext3(Common::ScratchBuffer<u8>& out);
KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) { KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) {
return this->RemoveWaiterByKey(out_has_waiters, key, false); return this->RemoveWaiterByKey(out_has_waiters, key, false);

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/scratch_buffer.h"
#include "core/core.h" #include "core/core.h"
#include "core/hle/kernel/k_client_session.h" #include "core/hle/kernel/k_client_session.h"
#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_process.h"
@ -45,11 +46,11 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)), handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)),
ResultInvalidPointer); ResultInvalidPointer);
std::vector<Handle> handles(num_handles); std::array<Handle, Svc::ArgumentHandleCountMax> handles;
GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles); GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles);
// Convert handle list to object table. // Convert handle list to object table.
std::vector<KSynchronizationObject*> objs(num_handles); std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(), R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(),
num_handles), num_handles),
ResultInvalidHandle); ResultInvalidHandle);
@ -80,7 +81,7 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
// Wait for an object. // Wait for an object.
s32 index; s32 index;
Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(), Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(),
static_cast<s32>(objs.size()), timeout_ns); num_handles, timeout_ns);
if (result == ResultTimedOut) { if (result == ResultTimedOut) {
R_RETURN(result); R_RETURN(result);
} }

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/scratch_buffer.h"
#include "core/core.h" #include "core/core.h"
#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_readable_event.h"
@ -54,7 +55,7 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
// Get the synchronization context. // Get the synchronization context.
auto& kernel = system.Kernel(); auto& kernel = system.Kernel();
auto& handle_table = GetCurrentProcess(kernel).GetHandleTable(); auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
std::vector<KSynchronizationObject*> objs(num_handles); std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
// Copy user handles. // Copy user handles.
if (num_handles > 0) { if (num_handles > 0) {
@ -72,8 +73,8 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
}); });
// Wait on the objects. // Wait on the objects.
Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(), Result res =
static_cast<s32>(objs.size()), timeout_ns); KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns);
R_SUCCEED_IF(res == ResultSessionClosed); R_SUCCEED_IF(res == ResultSessionClosed);
R_RETURN(res); R_RETURN(res);
@ -87,8 +88,7 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha
// Ensure number of handles is valid. // Ensure number of handles is valid.
R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange); R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
std::array<Handle, Svc::ArgumentHandleCountMax> handles;
std::vector<Handle> handles(num_handles);
if (num_handles > 0) { if (num_handles > 0) {
GetCurrentMemory(system.Kernel()) GetCurrentMemory(system.Kernel())
.ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle)); .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle));

View file

@ -174,7 +174,7 @@ Result GetThreadContext3(Core::System& system, u64 out_context, Handle thread_ha
} }
// Get the thread context. // Get the thread context.
std::vector<u8> context; static thread_local Common::ScratchBuffer<u8> context;
R_TRY(thread->GetThreadContext3(context)); R_TRY(thread->GetThreadContext3(context));
// Copy the thread context to user space. // Copy the thread context to user space.

View file

@ -5,6 +5,7 @@
#include "audio_core/renderer/audio_device.h" #include "audio_core/renderer/audio_device.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/settings.h"
#include "common/string_util.h" #include "common/string_util.h"
#include "core/core.h" #include "core/core.h"
#include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_event.h"
@ -123,19 +124,13 @@ private:
void GetReleasedAudioInBuffer(HLERequestContext& ctx) { void GetReleasedAudioInBuffer(HLERequestContext& ctx) {
const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
std::vector<u64> released_buffers(write_buffer_size); tmp_buffer.resize_destructive(write_buffer_size);
tmp_buffer[0] = 0;
const auto count = impl->GetReleasedBuffers(released_buffers); const auto count = impl->GetReleasedBuffers(tmp_buffer);
[[maybe_unused]] std::string tags{}; ctx.WriteBuffer(tmp_buffer);
for (u32 i = 0; i < count; i++) {
tags += fmt::format("{:08X}, ", released_buffers[i]);
}
[[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()};
LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
tags);
ctx.WriteBuffer(released_buffers);
IPC::ResponseBuilder rb{ctx, 3}; IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess); rb.Push(ResultSuccess);
rb.Push(count); rb.Push(count);
@ -200,6 +195,7 @@ private:
KernelHelpers::ServiceContext service_context; KernelHelpers::ServiceContext service_context;
Kernel::KEvent* event; Kernel::KEvent* event;
std::shared_ptr<AudioCore::AudioIn::In> impl; std::shared_ptr<AudioCore::AudioIn::In> impl;
Common::ScratchBuffer<u64> tmp_buffer;
}; };
AudInU::AudInU(Core::System& system_) AudInU::AudInU(Core::System& system_)

View file

@ -123,19 +123,13 @@ private:
void GetReleasedAudioOutBuffers(HLERequestContext& ctx) { void GetReleasedAudioOutBuffers(HLERequestContext& ctx) {
const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
std::vector<u64> released_buffers(write_buffer_size); tmp_buffer.resize_destructive(write_buffer_size);
tmp_buffer[0] = 0;
const auto count = impl->GetReleasedBuffers(released_buffers); const auto count = impl->GetReleasedBuffers(tmp_buffer);
[[maybe_unused]] std::string tags{}; ctx.WriteBuffer(tmp_buffer);
for (u32 i = 0; i < count; i++) {
tags += fmt::format("{:08X}, ", released_buffers[i]);
}
[[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()};
LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
tags);
ctx.WriteBuffer(released_buffers);
IPC::ResponseBuilder rb{ctx, 3}; IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess); rb.Push(ResultSuccess);
rb.Push(count); rb.Push(count);
@ -211,6 +205,7 @@ private:
KernelHelpers::ServiceContext service_context; KernelHelpers::ServiceContext service_context;
Kernel::KEvent* event; Kernel::KEvent* event;
std::shared_ptr<AudioCore::AudioOut::Out> impl; std::shared_ptr<AudioCore::AudioOut::Out> impl;
Common::ScratchBuffer<u64> tmp_buffer;
}; };
AudOutU::AudOutU(Core::System& system_) AudOutU::AudOutU(Core::System& system_)

View file

@ -116,28 +116,26 @@ private:
// These buffers are written manually to avoid an issue with WriteBuffer throwing errors for // These buffers are written manually to avoid an issue with WriteBuffer throwing errors for
// checking size 0. Performance size is 0 for most games. // checking size 0. Performance size is 0 for most games.
std::vector<u8> output{};
std::vector<u8> performance{};
auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0}; auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0};
if (is_buffer_b) { if (is_buffer_b) {
const auto buffersB{ctx.BufferDescriptorB()}; const auto buffersB{ctx.BufferDescriptorB()};
output.resize(buffersB[0].Size(), 0); tmp_output.resize_destructive(buffersB[0].Size());
performance.resize(buffersB[1].Size(), 0); tmp_performance.resize_destructive(buffersB[1].Size());
} else { } else {
const auto buffersC{ctx.BufferDescriptorC()}; const auto buffersC{ctx.BufferDescriptorC()};
output.resize(buffersC[0].Size(), 0); tmp_output.resize_destructive(buffersC[0].Size());
performance.resize(buffersC[1].Size(), 0); tmp_performance.resize_destructive(buffersC[1].Size());
} }
auto result = impl->RequestUpdate(input, performance, output); auto result = impl->RequestUpdate(input, tmp_performance, tmp_output);
if (result.IsSuccess()) { if (result.IsSuccess()) {
if (is_buffer_b) { if (is_buffer_b) {
ctx.WriteBufferB(output.data(), output.size(), 0); ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0);
ctx.WriteBufferB(performance.data(), performance.size(), 1); ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1);
} else { } else {
ctx.WriteBufferC(output.data(), output.size(), 0); ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0);
ctx.WriteBufferC(performance.data(), performance.size(), 1); ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1);
} }
} else { } else {
LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description); LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description);
@ -235,6 +233,8 @@ private:
Kernel::KEvent* rendered_event; Kernel::KEvent* rendered_event;
Manager& manager; Manager& manager;
std::unique_ptr<Renderer> impl; std::unique_ptr<Renderer> impl;
Common::ScratchBuffer<u8> tmp_output;
Common::ScratchBuffer<u8> tmp_performance;
}; };
class IAudioDevice final : public ServiceFramework<IAudioDevice> { class IAudioDevice final : public ServiceFramework<IAudioDevice> {

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "audio_core/audio_render_manager.h" #include "audio_core/audio_render_manager.h"
#include "common/scratch_buffer.h"
#include "core/hle/service/kernel_helpers.h" #include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/service.h" #include "core/hle/service/service.h"

View file

@ -68,13 +68,13 @@ private:
ExtraBehavior extra_behavior) { ExtraBehavior extra_behavior) {
u32 consumed = 0; u32 consumed = 0;
u32 sample_count = 0; u32 sample_count = 0;
std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>()); tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
if (extra_behavior == ExtraBehavior::ResetContext) { if (extra_behavior == ExtraBehavior::ResetContext) {
ResetDecoderContext(); ResetDecoderContext();
} }
if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) {
LOG_ERROR(Audio, "Failed to decode opus data"); LOG_ERROR(Audio, "Failed to decode opus data");
IPC::ResponseBuilder rb{ctx, 2}; IPC::ResponseBuilder rb{ctx, 2};
// TODO(ogniK): Use correct error code // TODO(ogniK): Use correct error code
@ -90,11 +90,11 @@ private:
if (performance) { if (performance) {
rb.Push<u64>(*performance); rb.Push<u64>(*performance);
} }
ctx.WriteBuffer(samples); ctx.WriteBuffer(tmp_samples);
} }
bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input, bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input,
std::vector<opus_int16>& output, u64* out_performance_time) const { std::span<opus_int16> output, u64* out_performance_time) const {
const auto start_time = std::chrono::steady_clock::now(); const auto start_time = std::chrono::steady_clock::now();
const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
if (sizeof(OpusPacketHeader) > input.size()) { if (sizeof(OpusPacketHeader) > input.size()) {
@ -154,6 +154,7 @@ private:
OpusDecoderPtr decoder; OpusDecoderPtr decoder;
u32 sample_rate; u32 sample_rate;
u32 channel_count; u32 channel_count;
Common::ScratchBuffer<opus_int16> tmp_samples;
}; };
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {

View file

@ -34,7 +34,7 @@ public:
* @returns The result code of the ioctl. * @returns The result code of the ioctl.
*/ */
virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) = 0; std::span<u8> output) = 0;
/** /**
* Handles an ioctl2 request. * Handles an ioctl2 request.
@ -45,7 +45,7 @@ public:
* @returns The result code of the ioctl. * @returns The result code of the ioctl.
*/ */
virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) = 0; std::span<const u8> inline_input, std::span<u8> output) = 0;
/** /**
* Handles an ioctl3 request. * Handles an ioctl3 request.
@ -56,7 +56,7 @@ public:
* @returns The result code of the ioctl. * @returns The result code of the ioctl.
*/ */
virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) = 0; std::span<u8> output, std::span<u8> inline_output) = 0;
/** /**
* Called once a device is opened * Called once a device is opened

View file

@ -18,19 +18,19 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
nvdisp_disp0::~nvdisp_disp0() = default; nvdisp_disp0::~nvdisp_disp0() = default;
NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }

View file

@ -26,11 +26,11 @@ public:
~nvdisp_disp0() override; ~nvdisp_disp0() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;

View file

@ -28,7 +28,7 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Con
nvhost_as_gpu::~nvhost_as_gpu() = default; nvhost_as_gpu::~nvhost_as_gpu() = default;
NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 'A': case 'A':
switch (command.cmd) { switch (command.cmd) {
@ -61,13 +61,13 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i
} }
NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> output, std::span<u8> inline_output) {
switch (command.group) { switch (command.group) {
case 'A': case 'A':
switch (command.cmd) { switch (command.cmd) {
@ -87,7 +87,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
void nvhost_as_gpu::OnOpen(DeviceFD fd) {} void nvhost_as_gpu::OnOpen(DeviceFD fd) {}
void nvhost_as_gpu::OnClose(DeviceFD fd) {} void nvhost_as_gpu::OnClose(DeviceFD fd) {}
NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::span<u8> output) {
IoctlAllocAsEx params{}; IoctlAllocAsEx params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -141,7 +141,7 @@ NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& ou
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::span<u8> output) {
IoctlAllocSpace params{}; IoctlAllocSpace params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -220,7 +220,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
mapping_map.erase(offset); mapping_map.erase(offset);
} }
NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::span<u8> output) {
IoctlFreeSpace params{}; IoctlFreeSpace params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -266,15 +266,14 @@ NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& ou
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::span<u8> output) {
const auto num_entries = input.size() / sizeof(IoctlRemapEntry); const auto num_entries = input.size() / sizeof(IoctlRemapEntry);
LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
std::vector<IoctlRemapEntry> entries(num_entries);
std::memcpy(entries.data(), input.data(), input.size());
std::scoped_lock lock(mutex); std::scoped_lock lock(mutex);
entries.resize_destructive(num_entries);
std::memcpy(entries.data(), input.data(), input.size());
if (!vm.initialised) { if (!vm.initialised) {
return NvResult::BadValue; return NvResult::BadValue;
@ -320,7 +319,7 @@ NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::span<u8> output) {
IoctlMapBufferEx params{}; IoctlMapBufferEx params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -424,7 +423,7 @@ NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>&
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlUnmapBuffer params{}; IoctlUnmapBuffer params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -463,7 +462,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>&
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::span<u8> output) {
IoctlBindChannel params{}; IoctlBindChannel params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
@ -492,7 +491,7 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
}; };
} }
NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output) {
IoctlGetVaRegions params{}; IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -511,8 +510,8 @@ NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>&
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output, NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) { std::span<u8> inline_output) {
IoctlGetVaRegions params{}; IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());

View file

@ -15,6 +15,7 @@
#include "common/address_space.h" #include "common/address_space.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h"
#include "common/swap.h" #include "common/swap.h"
#include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/devices/nvdevice.h"
@ -48,11 +49,11 @@ public:
~nvhost_as_gpu() override; ~nvhost_as_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;
@ -138,18 +139,18 @@ private:
static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
"IoctlGetVaRegions is incorrect size"); "IoctlGetVaRegions is incorrect size");
NvResult AllocAsEx(std::span<const u8> input, std::vector<u8>& output); NvResult AllocAsEx(std::span<const u8> input, std::span<u8> output);
NvResult AllocateSpace(std::span<const u8> input, std::vector<u8>& output); NvResult AllocateSpace(std::span<const u8> input, std::span<u8> output);
NvResult Remap(std::span<const u8> input, std::vector<u8>& output); NvResult Remap(std::span<const u8> input, std::span<u8> output);
NvResult MapBufferEx(std::span<const u8> input, std::vector<u8>& output); NvResult MapBufferEx(std::span<const u8> input, std::span<u8> output);
NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
NvResult FreeSpace(std::span<const u8> input, std::vector<u8>& output); NvResult FreeSpace(std::span<const u8> input, std::span<u8> output);
NvResult BindChannel(std::span<const u8> input, std::vector<u8>& output); NvResult BindChannel(std::span<const u8> input, std::span<u8> output);
void GetVARegionsImpl(IoctlGetVaRegions& params); void GetVARegionsImpl(IoctlGetVaRegions& params);
NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output); NvResult GetVARegions(std::span<const u8> input, std::span<u8> output);
NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output, NvResult GetVARegions(std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output); std::span<u8> inline_output);
void FreeMappingLocked(u64 offset); void FreeMappingLocked(u64 offset);
@ -212,6 +213,7 @@ private:
bool initialised{}; bool initialised{};
} vm; } vm;
std::shared_ptr<Tegra::MemoryManager> gmmu; std::shared_ptr<Tegra::MemoryManager> gmmu;
Common::ScratchBuffer<IoctlRemapEntry> entries;
// s32 channel{}; // s32 channel{};
// u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};

View file

@ -35,7 +35,7 @@ nvhost_ctrl::~nvhost_ctrl() {
} }
NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
@ -64,13 +64,13 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inp
} }
NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_outpu) { std::span<u8> output, std::span<u8> inline_outpu) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
@ -79,7 +79,7 @@ void nvhost_ctrl::OnOpen(DeviceFD fd) {}
void nvhost_ctrl::OnClose(DeviceFD fd) {} void nvhost_ctrl::OnClose(DeviceFD fd) {}
NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output) {
IocGetConfigParams params{}; IocGetConfigParams params{};
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(), LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(),
@ -87,7 +87,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8
return NvResult::ConfigVarNotFound; // Returns error on production mode return NvResult::ConfigVarNotFound; // Returns error on production mode
} }
NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::span<u8> output,
bool is_allocation) { bool is_allocation) {
IocCtrlEventWaitParams params{}; IocCtrlEventWaitParams params{};
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
@ -231,7 +231,7 @@ NvResult nvhost_ctrl::FreeEvent(u32 slot) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output) {
IocCtrlEventRegisterParams params{}; IocCtrlEventRegisterParams params{};
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
const u32 event_id = params.user_event_id; const u32 event_id = params.user_event_id;
@ -252,7 +252,7 @@ NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vecto
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output) {
IocCtrlEventUnregisterParams params{}; IocCtrlEventUnregisterParams params{};
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
const u32 event_id = params.user_event_id & 0x00FF; const u32 event_id = params.user_event_id & 0x00FF;
@ -262,8 +262,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vec
return FreeEvent(event_id); return FreeEvent(event_id);
} }
NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output) {
std::vector<u8>& output) {
IocCtrlEventUnregisterBatchParams params{}; IocCtrlEventUnregisterBatchParams params{};
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
u64 event_mask = params.user_events; u64 event_mask = params.user_events;
@ -281,7 +280,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input,
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output) {
IocCtrlEventClearParams params{}; IocCtrlEventClearParams params{};
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));

View file

@ -26,11 +26,11 @@ public:
~nvhost_ctrl() override; ~nvhost_ctrl() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;
@ -186,13 +186,12 @@ private:
static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8, static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8,
"IocCtrlEventKill is incorrect size"); "IocCtrlEventKill is incorrect size");
NvResult NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output); NvResult NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output);
NvResult IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, NvResult IocCtrlEventWait(std::span<const u8> input, std::span<u8> output, bool is_allocation);
bool is_allocation); NvResult IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output);
NvResult IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output); NvResult IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output);
NvResult IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output); NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output);
NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::vector<u8>& output); NvResult IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output);
NvResult IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output);
NvResult FreeEvent(u32 slot); NvResult FreeEvent(u32 slot);

View file

@ -22,7 +22,7 @@ nvhost_ctrl_gpu::~nvhost_ctrl_gpu() {
} }
NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 'G': case 'G':
switch (command.cmd) { switch (command.cmd) {
@ -54,13 +54,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8>
} }
NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> output, std::span<u8> inline_output) {
switch (command.group) { switch (command.group) {
case 'G': case 'G':
switch (command.cmd) { switch (command.cmd) {
@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {}
void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {}
NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlCharacteristics params{}; IoctlCharacteristics params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -127,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) { std::span<u8> inline_output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlCharacteristics params{}; IoctlCharacteristics params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
@ -175,7 +175,7 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output) {
IoctlGpuGetTpcMasksArgs params{}; IoctlGpuGetTpcMasksArgs params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@ -186,8 +186,8 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) { std::span<u8> inline_output) {
IoctlGpuGetTpcMasksArgs params{}; IoctlGpuGetTpcMasksArgs params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@ -199,7 +199,7 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlActiveSlotMask params{}; IoctlActiveSlotMask params{};
@ -212,7 +212,7 @@ NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vect
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlZcullGetCtxSize params{}; IoctlZcullGetCtxSize params{};
@ -224,7 +224,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlNvgpuGpuZcullGetInfoArgs params{}; IoctlNvgpuGpuZcullGetInfoArgs params{};
@ -247,7 +247,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::span<u8> output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called"); LOG_WARNING(Service_NVDRV, "(STUBBED) called");
IoctlZbcSetTable params{}; IoctlZbcSetTable params{};
@ -263,7 +263,7 @@ NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::span<u8> output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called"); LOG_WARNING(Service_NVDRV, "(STUBBED) called");
IoctlZbcQueryTable params{}; IoctlZbcQueryTable params{};
@ -273,7 +273,7 @@ NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::span<u8> output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called"); LOG_WARNING(Service_NVDRV, "(STUBBED) called");
IoctlFlushL2 params{}; IoctlFlushL2 params{};
@ -283,7 +283,7 @@ NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& ou
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlGetGpuTime params{}; IoctlGetGpuTime params{};

View file

@ -22,11 +22,11 @@ public:
~nvhost_ctrl_gpu() override; ~nvhost_ctrl_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;
@ -151,21 +151,21 @@ private:
}; };
static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size"); static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size");
NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output); NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output);
NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output); std::span<u8> inline_output);
NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output); NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output);
NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output); std::span<u8> inline_output);
NvResult GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output); NvResult GetActiveSlotMask(std::span<const u8> input, std::span<u8> output);
NvResult ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output); NvResult ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output);
NvResult ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output); NvResult ZCullGetInfo(std::span<const u8> input, std::span<u8> output);
NvResult ZBCSetTable(std::span<const u8> input, std::vector<u8>& output); NvResult ZBCSetTable(std::span<const u8> input, std::span<u8> output);
NvResult ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output); NvResult ZBCQueryTable(std::span<const u8> input, std::span<u8> output);
NvResult FlushL2(std::span<const u8> input, std::vector<u8>& output); NvResult FlushL2(std::span<const u8> input, std::span<u8> output);
NvResult GetGpuTime(std::span<const u8> input, std::vector<u8>& output); NvResult GetGpuTime(std::span<const u8> input, std::span<u8> output);
EventInterface& events_interface; EventInterface& events_interface;

View file

@ -47,7 +47,7 @@ nvhost_gpu::~nvhost_gpu() {
} }
NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
@ -99,7 +99,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
}; };
NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 'H': case 'H':
switch (command.cmd) { switch (command.cmd) {
@ -113,7 +113,7 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> inpu
} }
NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
@ -121,7 +121,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
void nvhost_gpu::OnOpen(DeviceFD fd) {} void nvhost_gpu::OnOpen(DeviceFD fd) {}
void nvhost_gpu::OnClose(DeviceFD fd) {} void nvhost_gpu::OnClose(DeviceFD fd) {}
NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
IoctlSetNvmapFD params{}; IoctlSetNvmapFD params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
@ -130,7 +130,7 @@ NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& outp
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlClientData params{}; IoctlClientData params{};
@ -139,7 +139,7 @@ NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& o
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
IoctlClientData params{}; IoctlClientData params{};
@ -149,7 +149,7 @@ NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& o
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::span<u8> output) {
std::memcpy(&zcull_params, input.data(), input.size()); std::memcpy(&zcull_params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va, LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va,
zcull_params.mode); zcull_params.mode);
@ -158,7 +158,7 @@ NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& outpu
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::span<u8> output) {
IoctlSetErrorNotifier params{}; IoctlSetErrorNotifier params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset, LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset,
@ -168,14 +168,14 @@ NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::span<u8> output) {
std::memcpy(&channel_priority, input.data(), input.size()); std::memcpy(&channel_priority, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority); LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output) {
IoctlAllocGpfifoEx2 params{}; IoctlAllocGpfifoEx2 params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, LOG_WARNING(Service_NVDRV,
@ -197,7 +197,7 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>&
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::span<u8> output) {
IoctlAllocObjCtx params{}; IoctlAllocObjCtx params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num, LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num,
@ -208,7 +208,8 @@ NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vecto
return NvResult::Success; return NvResult::Success;
} }
static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { static boost::container::small_vector<Tegra::CommandHeader, 512> BuildWaitCommandList(
NvFence fence) {
return { return {
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
Tegra::SubmissionMode::Increasing), Tegra::SubmissionMode::Increasing),
@ -219,35 +220,35 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
}; };
} }
static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) { static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementCommandList(
std::vector<Tegra::CommandHeader> result{ NvFence fence) {
boost::container::small_vector<Tegra::CommandHeader, 512> result{
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
Tegra::SubmissionMode::Increasing), Tegra::SubmissionMode::Increasing),
{}}; {}};
for (u32 count = 0; count < 2; ++count) { for (u32 count = 0; count < 2; ++count) {
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, result.push_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
Tegra::SubmissionMode::Increasing)); Tegra::SubmissionMode::Increasing));
result.emplace_back( result.push_back(
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
} }
return result; return result;
} }
static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) { static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementWithWfiCommandList(
std::vector<Tegra::CommandHeader> result{ NvFence fence) {
boost::container::small_vector<Tegra::CommandHeader, 512> result{
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
Tegra::SubmissionMode::Increasing), Tegra::SubmissionMode::Increasing),
{}}; {}};
const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)}; auto increment_list{BuildIncrementCommandList(fence)};
result.insert(result.end(), increment_list.begin(), increment_list.end());
result.insert(result.end(), increment.begin(), increment.end());
return result; return result;
} }
NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
Tegra::CommandList&& entries) { Tegra::CommandList&& entries) {
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
params.num_entries, params.flags.raw); params.num_entries, params.flags.raw);
@ -293,7 +294,7 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
bool kickoff) { bool kickoff) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) { if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED(); UNIMPLEMENTED();
@ -315,7 +316,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>
} }
NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
std::vector<u8>& output) { std::span<u8> output) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) { if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED(); UNIMPLEMENTED();
return NvResult::InvalidSize; return NvResult::InvalidSize;
@ -327,7 +328,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const
return SubmitGPFIFOImpl(params, output, std::move(entries)); return SubmitGPFIFOImpl(params, output, std::move(entries));
} }
NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
IoctlGetWaitbase params{}; IoctlGetWaitbase params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase)); std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
@ -337,7 +338,7 @@ NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& out
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::span<u8> output) {
IoctlChannelSetTimeout params{}; IoctlChannelSetTimeout params{};
std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout)); std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout));
LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout); LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);
@ -345,7 +346,7 @@ NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output) {
IoctlSetTimeslice params{}; IoctlSetTimeslice params{};
std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice)); std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);

View file

@ -41,11 +41,11 @@ public:
~nvhost_gpu() override; ~nvhost_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;
@ -186,23 +186,23 @@ private:
u32_le channel_priority{}; u32_le channel_priority{};
u32_le channel_timeslice{}; u32_le channel_timeslice{};
NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
NvResult SetClientData(std::span<const u8> input, std::vector<u8>& output); NvResult SetClientData(std::span<const u8> input, std::span<u8> output);
NvResult GetClientData(std::span<const u8> input, std::vector<u8>& output); NvResult GetClientData(std::span<const u8> input, std::span<u8> output);
NvResult ZCullBind(std::span<const u8> input, std::vector<u8>& output); NvResult ZCullBind(std::span<const u8> input, std::span<u8> output);
NvResult SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output); NvResult SetErrorNotifier(std::span<const u8> input, std::span<u8> output);
NvResult SetChannelPriority(std::span<const u8> input, std::vector<u8>& output); NvResult SetChannelPriority(std::span<const u8> input, std::span<u8> output);
NvResult AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output); NvResult AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output);
NvResult AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output); NvResult AllocateObjectContext(std::span<const u8> input, std::span<u8> output);
NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
Tegra::CommandList&& entries); Tegra::CommandList&& entries);
NvResult SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
bool kickoff = false); bool kickoff = false);
NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
std::vector<u8>& output); std::span<u8> output);
NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
NvResult ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output); NvResult ChannelSetTimeout(std::span<const u8> input, std::span<u8> output);
NvResult ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output); NvResult ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output);
EventInterface& events_interface; EventInterface& events_interface;
NvCore::Container& core; NvCore::Container& core;

View file

@ -16,7 +16,7 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
nvhost_nvdec::~nvhost_nvdec() = default; nvhost_nvdec::~nvhost_nvdec() = default;
NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
@ -56,13 +56,13 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
} }
NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }

View file

@ -14,11 +14,11 @@ public:
~nvhost_nvdec() override; ~nvhost_nvdec() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;

View file

@ -36,7 +36,7 @@ std::size_t SliceVectors(std::span<const u8> input, std::vector<T>& dst, std::si
// Writes the data in src to an offset into the dst vector. The offset is specified in bytes // Writes the data in src to an offset into the dst vector. The offset is specified in bytes
// Returns the number of bytes written into dst. // Returns the number of bytes written into dst.
template <typename T> template <typename T>
std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size_t offset) {
if (src.empty()) { if (src.empty()) {
return 0; return 0;
} }
@ -72,8 +72,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(std::span<const u8> input) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output) {
std::vector<u8>& output) {
IoctlSubmit params{}; IoctlSubmit params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmit)); std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@ -121,7 +120,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input,
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::span<u8> output) {
IoctlGetSyncpoint params{}; IoctlGetSyncpoint params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint)); std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
@ -133,7 +132,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vecto
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
IoctlGetWaitbase params{}; IoctlGetWaitbase params{};
LOG_CRITICAL(Service_NVDRV, "called WAITBASE"); LOG_CRITICAL(Service_NVDRV, "called WAITBASE");
std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase)); std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
@ -142,7 +141,7 @@ NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlMapBuffer params{}; IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@ -159,7 +158,7 @@ NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlMapBuffer params{}; IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@ -173,7 +172,7 @@ NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::span<u8> output) {
std::memcpy(&submit_timeout, input.data(), input.size()); std::memcpy(&submit_timeout, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called"); LOG_WARNING(Service_NVDRV, "(STUBBED) called");
return NvResult::Success; return NvResult::Success;

View file

@ -108,12 +108,12 @@ protected:
/// Ioctl command implementations /// Ioctl command implementations
NvResult SetNVMAPfd(std::span<const u8> input); NvResult SetNVMAPfd(std::span<const u8> input);
NvResult Submit(DeviceFD fd, std::span<const u8> input, std::vector<u8>& output); NvResult Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output);
NvResult GetSyncpoint(std::span<const u8> input, std::vector<u8>& output); NvResult GetSyncpoint(std::span<const u8> input, std::span<u8> output);
NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
NvResult MapBuffer(std::span<const u8> input, std::vector<u8>& output); NvResult MapBuffer(std::span<const u8> input, std::span<u8> output);
NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
NvResult SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output); NvResult SetSubmitTimeout(std::span<const u8> input, std::span<u8> output);
Kernel::KEvent* QueryEvent(u32 event_id) override; Kernel::KEvent* QueryEvent(u32 event_id) override;

View file

@ -13,7 +13,7 @@ nvhost_nvjpg::nvhost_nvjpg(Core::System& system_) : nvdevice{system_} {}
nvhost_nvjpg::~nvhost_nvjpg() = default; nvhost_nvjpg::~nvhost_nvjpg() = default;
NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 'H': case 'H':
switch (command.cmd) { switch (command.cmd) {
@ -32,13 +32,13 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
} }
NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
@ -46,7 +46,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
void nvhost_nvjpg::OnOpen(DeviceFD fd) {} void nvhost_nvjpg::OnOpen(DeviceFD fd) {}
void nvhost_nvjpg::OnClose(DeviceFD fd) {} void nvhost_nvjpg::OnClose(DeviceFD fd) {}
NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
IoctlSetNvmapFD params{}; IoctlSetNvmapFD params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);

View file

@ -16,11 +16,11 @@ public:
~nvhost_nvjpg() override; ~nvhost_nvjpg() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;
@ -33,7 +33,7 @@ private:
s32_le nvmap_fd{}; s32_le nvmap_fd{};
NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
}; };
} // namespace Service::Nvidia::Devices } // namespace Service::Nvidia::Devices

View file

@ -16,7 +16,7 @@ nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
nvhost_vic::~nvhost_vic() = default; nvhost_vic::~nvhost_vic() = default;
NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
@ -56,13 +56,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
} }
NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }

View file

@ -13,11 +13,11 @@ public:
~nvhost_vic(); ~nvhost_vic();
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;

View file

@ -26,7 +26,7 @@ nvmap::nvmap(Core::System& system_, NvCore::Container& container_)
nvmap::~nvmap() = default; nvmap::~nvmap() = default;
NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
switch (command.group) { switch (command.group) {
case 0x1: case 0x1:
switch (command.cmd) { switch (command.cmd) {
@ -55,13 +55,13 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
} }
NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented; return NvResult::NotImplemented;
} }
@ -69,7 +69,7 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
void nvmap::OnOpen(DeviceFD fd) {} void nvmap::OnOpen(DeviceFD fd) {}
void nvmap::OnClose(DeviceFD fd) {} void nvmap::OnClose(DeviceFD fd) {}
NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) { NvResult nvmap::IocCreate(std::span<const u8> input, std::span<u8> output) {
IocCreateParams params; IocCreateParams params;
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);
@ -89,7 +89,7 @@ NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) { NvResult nvmap::IocAlloc(std::span<const u8> input, std::span<u8> output) {
IocAllocParams params; IocAllocParams params;
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);
@ -137,7 +137,7 @@ NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) {
return result; return result;
} }
NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) { NvResult nvmap::IocGetId(std::span<const u8> input, std::span<u8> output) {
IocGetIdParams params; IocGetIdParams params;
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
@ -161,7 +161,7 @@ NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) { NvResult nvmap::IocFromId(std::span<const u8> input, std::span<u8> output) {
IocFromIdParams params; IocFromIdParams params;
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));
@ -192,7 +192,7 @@ NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) { NvResult nvmap::IocParam(std::span<const u8> input, std::span<u8> output) {
enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 }; enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 };
IocParamParams params; IocParamParams params;
@ -241,7 +241,7 @@ NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvmap::IocFree(std::span<const u8> input, std::vector<u8>& output) { NvResult nvmap::IocFree(std::span<const u8> input, std::span<u8> output) {
IocFreeParams params; IocFreeParams params;
std::memcpy(&params, input.data(), sizeof(params)); std::memcpy(&params, input.data(), sizeof(params));

View file

@ -27,11 +27,11 @@ public:
nvmap& operator=(const nvmap&) = delete; nvmap& operator=(const nvmap&) = delete;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) override; std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) override; std::span<const u8> inline_input, std::span<u8> output) override;
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output) override; std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override; void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;
@ -106,12 +106,12 @@ private:
}; };
static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
NvResult IocCreate(std::span<const u8> input, std::vector<u8>& output); NvResult IocCreate(std::span<const u8> input, std::span<u8> output);
NvResult IocAlloc(std::span<const u8> input, std::vector<u8>& output); NvResult IocAlloc(std::span<const u8> input, std::span<u8> output);
NvResult IocGetId(std::span<const u8> input, std::vector<u8>& output); NvResult IocGetId(std::span<const u8> input, std::span<u8> output);
NvResult IocFromId(std::span<const u8> input, std::vector<u8>& output); NvResult IocFromId(std::span<const u8> input, std::span<u8> output);
NvResult IocParam(std::span<const u8> input, std::vector<u8>& output); NvResult IocParam(std::span<const u8> input, std::span<u8> output);
NvResult IocFree(std::span<const u8> input, std::vector<u8>& output); NvResult IocFree(std::span<const u8> input, std::span<u8> output);
NvCore::Container& container; NvCore::Container& container;
NvCore::NvMap& file; NvCore::NvMap& file;

View file

@ -130,7 +130,7 @@ DeviceFD Module::Open(const std::string& device_name) {
} }
NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::vector<u8>& output) { std::span<u8> output) {
if (fd < 0) { if (fd < 0) {
LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
return NvResult::InvalidState; return NvResult::InvalidState;
@ -147,7 +147,7 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
} }
NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output) { std::span<const u8> inline_input, std::span<u8> output) {
if (fd < 0) { if (fd < 0) {
LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
return NvResult::InvalidState; return NvResult::InvalidState;
@ -163,8 +163,8 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
return itr->second->Ioctl2(fd, command, input, inline_input, output); return itr->second->Ioctl2(fd, command, input, inline_input, output);
} }
NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& output, std::vector<u8>& inline_output) { std::span<u8> inline_output) {
if (fd < 0) { if (fd < 0) {
LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
return NvResult::InvalidState; return NvResult::InvalidState;

View file

@ -80,13 +80,13 @@ public:
DeviceFD Open(const std::string& device_name); DeviceFD Open(const std::string& device_name);
/// Sends an ioctl command to the specified file descriptor. /// Sends an ioctl command to the specified file descriptor.
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output); NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output);
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
std::span<const u8> inline_input, std::vector<u8>& output); std::span<const u8> inline_input, std::span<u8> output);
NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
std::vector<u8>& inline_output); std::span<u8> inline_output);
/// Closes a device file descriptor and returns operation success. /// Closes a device file descriptor and returns operation success.
NvResult Close(DeviceFD fd); NvResult Close(DeviceFD fd);

View file

@ -63,12 +63,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) {
} }
// Check device // Check device
std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
const auto input_buffer = ctx.ReadBuffer(0); const auto input_buffer = ctx.ReadBuffer(0);
const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output);
if (command.is_out != 0) { if (command.is_out != 0) {
ctx.WriteBuffer(output_buffer); ctx.WriteBuffer(tmp_output);
} }
IPC::ResponseBuilder rb{ctx, 3}; IPC::ResponseBuilder rb{ctx, 3};
@ -90,12 +90,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) {
const auto input_buffer = ctx.ReadBuffer(0); const auto input_buffer = ctx.ReadBuffer(0);
const auto input_inlined_buffer = ctx.ReadBuffer(1); const auto input_inlined_buffer = ctx.ReadBuffer(1);
std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
const auto nv_result = const auto nv_result =
nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output);
if (command.is_out != 0) { if (command.is_out != 0) {
ctx.WriteBuffer(output_buffer); ctx.WriteBuffer(tmp_output);
} }
IPC::ResponseBuilder rb{ctx, 3}; IPC::ResponseBuilder rb{ctx, 3};
@ -116,14 +116,12 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) {
} }
const auto input_buffer = ctx.ReadBuffer(0); const auto input_buffer = ctx.ReadBuffer(0);
std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1));
const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline);
const auto nv_result =
nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
if (command.is_out != 0) { if (command.is_out != 0) {
ctx.WriteBuffer(output_buffer, 0); ctx.WriteBuffer(tmp_output, 0);
ctx.WriteBuffer(output_buffer_inline, 1); ctx.WriteBuffer(tmp_output_inline, 1);
} }
IPC::ResponseBuilder rb{ctx, 3}; IPC::ResponseBuilder rb{ctx, 3};

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include "common/scratch_buffer.h"
#include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/service.h" #include "core/hle/service/service.h"
@ -33,6 +34,8 @@ private:
u64 pid{}; u64 pid{};
bool is_initialized{}; bool is_initialized{};
Common::ScratchBuffer<u8> tmp_output;
Common::ScratchBuffer<u8> tmp_output_inline;
}; };
} // namespace Service::Nvidia } // namespace Service::Nvidia

View file

@ -6,6 +6,7 @@
#include <memory> #include <memory>
#include <span> #include <span>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
@ -167,7 +168,7 @@ public:
private: private:
template <typename T> template <typename T>
requires(std::is_trivially_copyable_v<T>) requires(std::is_trivially_copyable_v<T>)
void WriteImpl(const T& val, std::vector<u8>& buffer) { void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) {
const size_t aligned_size = Common::AlignUp(sizeof(T), 4); const size_t aligned_size = Common::AlignUp(sizeof(T), 4);
const size_t old_size = buffer.size(); const size_t old_size = buffer.size();
buffer.resize(old_size + aligned_size); buffer.resize(old_size + aligned_size);
@ -176,8 +177,8 @@ private:
} }
private: private:
std::vector<u8> m_data_buffer; boost::container::small_vector<u8, 0x200> m_data_buffer;
std::vector<u8> m_object_buffer; boost::container::small_vector<u8, 0x200> m_object_buffer;
}; };
} // namespace Service::android } // namespace Service::android

View file

@ -479,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
const u32 remainder{4 - element}; const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{}; const TransformFeedbackVarying* xfb_varying{};
const size_t xfb_varying_index{base_index + element}; const size_t xfb_varying_index{base_index + element};
if (xfb_varying_index < runtime_info.xfb_varyings.size()) { if (xfb_varying_index < runtime_info.xfb_count) {
xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
} }

View file

@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
} }
void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
if (ctx.runtime_info.xfb_varyings.empty()) { if (ctx.runtime_info.xfb_count == 0) {
return; return;
} }
ctx.AddCapability(spv::Capability::TransformFeedback); ctx.AddCapability(spv::Capability::TransformFeedback);

View file

@ -160,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo
const u32 remainder{4 - element}; const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{}; const TransformFeedbackVarying* xfb_varying{};
const size_t xfb_varying_index{base_attr_index + element}; const size_t xfb_varying_index{base_attr_index + element};
if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) { if (xfb_varying_index < ctx.runtime_info.xfb_count) {
xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index];
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
} }

View file

@ -84,7 +84,8 @@ struct RuntimeInfo {
bool glasm_use_storage_buffers{}; bool glasm_use_storage_buffers{};
/// Transform feedback state for each varying /// Transform feedback state for each varying
std::vector<TransformFeedbackVarying> xfb_varyings; std::array<TransformFeedbackVarying, 256> xfb_varyings{};
u32 xfb_count{0};
}; };
} // namespace Shader } // namespace Shader

View file

@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
if (has_new_downloads) { if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
} }
tmp_buffer.resize(amount); tmp_buffer.resize_destructive(amount);
cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
return true; return true;
@ -1279,7 +1279,7 @@ template <class P>
typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
u32 wanted_size) { u32 wanted_size) {
static constexpr int STREAM_LEAP_THRESHOLD = 16; static constexpr int STREAM_LEAP_THRESHOLD = 16;
std::vector<BufferId> overlap_ids; boost::container::small_vector<BufferId, 16> overlap_ids;
VAddr begin = cpu_addr; VAddr begin = cpu_addr;
VAddr end = cpu_addr + wanted_size; VAddr end = cpu_addr + wanted_size;
int stream_score = 0; int stream_score = 0;

View file

@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
struct OverlapResult { struct OverlapResult {
std::vector<BufferId> ids; boost::container::small_vector<BufferId, 16> ids;
VAddr begin; VAddr begin;
VAddr end; VAddr end;
bool has_stream_leap = false; bool has_stream_leap = false;
@ -582,7 +582,7 @@ private:
BufferId inline_buffer_id; BufferId inline_buffer_id;
std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
std::vector<u8> tmp_buffer; Common::ScratchBuffer<u8> tmp_buffer;
}; };
} // namespace VideoCommon } // namespace VideoCommon

View file

@ -63,7 +63,6 @@ struct ChCommand {
}; };
using ChCommandHeaderList = std::vector<ChCommandHeader>; using ChCommandHeaderList = std::vector<ChCommandHeader>;
using ChCommandList = std::vector<ChCommand>;
struct ThiRegisters { struct ThiRegisters {
u32_le increment_syncpt{}; u32_le increment_syncpt{};

View file

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <span> #include <span>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include <queue> #include <queue>
#include "common/bit_field.h" #include "common/bit_field.h"
@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
struct CommandList final { struct CommandList final {
CommandList() = default; CommandList() = default;
explicit CommandList(std::size_t size) : command_lists(size) {} explicit CommandList(std::size_t size) : command_lists(size) {}
explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) explicit CommandList(
boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
: prefetch_command_list{std::move(prefetch_command_list_)} {} : prefetch_command_list{std::move(prefetch_command_list_)} {}
std::vector<CommandListHeader> command_lists; boost::container::small_vector<CommandListHeader, 512> command_lists;
std::vector<CommandHeader> prefetch_command_list; boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
}; };
/** /**

View file

@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
ASSERT(regs.remap_const.component_size_minus_one == 3); ASSERT(regs.remap_const.component_size_minus_one == 3);
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
std::ranges::fill(span, regs.remap_consta_value);
memory_manager.WriteBlockUnsafe(regs.offset_out, memory_manager.WriteBlockUnsafe(regs.offset_out,
reinterpret_cast<u8*>(tmp_buffer.data()), reinterpret_cast<u8*>(read_buffer.data()),
regs.line_length_in * sizeof(u32)); regs.line_length_in * sizeof(u32));
} else { } else {
memory_manager.FlushCaching(); memory_manager.FlushCaching();
@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
std::vector<u8> tmp_buffer(16); read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe( memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset), convert_linear_2_blocklinear_addr(regs.offset_in + offset),
tmp_buffer.data(), tmp_buffer.size()); read_buffer.data(), read_buffer.size());
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
tmp_buffer.size()); read_buffer.size());
} }
} else if (is_src_pitch && !is_dst_pitch) { } else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
std::vector<u8> tmp_buffer(16); read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
tmp_buffer.size()); read_buffer.size());
memory_manager.WriteBlockCached( memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset), convert_linear_2_blocklinear_addr(regs.offset_out + offset),
tmp_buffer.data(), tmp_buffer.size()); read_buffer.data(), read_buffer.size());
} }
} else { } else {
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
std::vector<u8> tmp_buffer(regs.line_length_in); read_buffer.resize_destructive(regs.line_length_in);
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
regs.line_length_in); regs.line_length_in);
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
regs.line_length_in); regs.line_length_in);
} }
} }
@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_operand.address = regs.offset_in; src_operand.address = regs.offset_in;
DMA::BufferOperand dst_operand; DMA::BufferOperand dst_operand;
dst_operand.pitch = regs.pitch_out; u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
dst_operand.pitch = abs_pitch_out;
dst_operand.width = regs.line_length_in; dst_operand.width = regs.line_length_in;
dst_operand.height = regs.line_count; dst_operand.height = regs.line_count;
dst_operand.address = regs.offset_out; dst_operand.address = regs.offset_out;
@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const size_t src_size = const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
read_buffer.resize_destructive(src_size); read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size); write_buffer.resize_destructive(dst_size);
@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_out); abs_pitch_out);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
} }

View file

@ -4,6 +4,7 @@
#include <array> #include <array>
#include <bit> #include <bit>
#include "common/scratch_buffer.h"
#include "common/settings.h" #include "common/settings.h"
#include "video_core/host1x/codecs/h264.h" #include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
} }
void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
std::vector<u8> scan(count); static Common::ScratchBuffer<u8> scan{};
scan.resize_destructive(count);
if (count == 16) { if (count == 16) {
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
} else { } else {

View file

@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which) { VideoCommon::CacheType which) {
std::vector<u8> tmp_buffer(size); tmp_buffer.resize_destructive(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
// The output block must be flushed in case it has data modified from the GPU. // The output block must be flushed in case it has data modified from the GPU.
@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
return result; return result;
} }
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32>
GPUVAddr gpu_addr, std::size_t size) const { MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
std::vector<std::pair<GPUVAddr, std::size_t>> result{}; boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{};
GetSubmappedRangeImpl<true>(gpu_addr, size, result); GetSubmappedRangeImpl<true>(gpu_addr, size, result);
return result; return result;
} }
@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
template <bool is_gpu_address> template <bool is_gpu_address>
void MemoryManager::GetSubmappedRangeImpl( void MemoryManager::GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size, GPUVAddr gpu_addr, std::size_t size,
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& boost::container::small_vector<
result) const { std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result)
const {
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
last_segment{}; last_segment{};
std::optional<VAddr> old_page_addr{}; std::optional<VAddr> old_page_addr{};

View file

@ -8,10 +8,12 @@
#include <mutex> #include <mutex>
#include <optional> #include <optional>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/multi_level_page_table.h" #include "common/multi_level_page_table.h"
#include "common/range_map.h" #include "common/range_map.h"
#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h" #include "common/virtual_buffer.h"
#include "video_core/cache_types.h" #include "video_core/cache_types.h"
#include "video_core/pte_kind.h" #include "video_core/pte_kind.h"
@ -107,8 +109,8 @@ public:
* if the region is continuous, a single pair will be returned. If it's unmapped, an empty * if the region is continuous, a single pair will be returned. If it's unmapped, an empty
* vector will be returned; * vector will be returned;
*/ */
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
std::size_t size) const; GPUVAddr gpu_addr, std::size_t size) const;
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@ -165,7 +167,8 @@ private:
template <bool is_gpu_address> template <bool is_gpu_address>
void GetSubmappedRangeImpl( void GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size, GPUVAddr gpu_addr, std::size_t size,
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& boost::container::small_vector<
std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>&
result) const; result) const;
Core::System& system; Core::System& system;
@ -215,8 +218,8 @@ private:
Common::VirtualBuffer<u32> big_page_table_cpu; Common::VirtualBuffer<u32> big_page_table_cpu;
std::vector<u64> big_page_continuous; std::vector<u64> big_page_continuous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{}; boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{};
std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{};
mutable std::mutex guard; mutable std::mutex guard;
@ -226,6 +229,8 @@ private:
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
static std::atomic<size_t> unique_identifier_generator; static std::atomic<size_t> unique_identifier_generator;
Common::ScratchBuffer<u8> tmp_buffer;
}; };
} // namespace Tegra } // namespace Tegra

View file

@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Shader::Stage::VertexB: case Shader::Stage::VertexB:
case Shader::Stage::Geometry: case Shader::Stage::Geometry:
if (!use_assembly_shaders && key.xfb_enabled != 0) { if (!use_assembly_shaders && key.xfb_enabled != 0) {
info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
info.xfb_varyings = varyings;
info.xfb_count = count;
} }
break; break;
case Shader::Stage::TessellationEval: case Shader::Stage::TessellationEval:

View file

@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
}; };
// Measuring a popular game, this number never exceeds the specified size once data is warmed up // Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {

View file

@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size; info.fixed_state_point_size = point_size;
} }
if (key.state.xfb_enabled) { if (key.state.xfb_enabled) {
info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); auto [varyings, count] =
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
info.xfb_varyings = varyings;
info.xfb_count = count;
} }
info.convert_depth_mode = gl_ndc; info.convert_depth_mode = gl_ndc;
} }
@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size; info.fixed_state_point_size = point_size;
} }
if (key.state.xfb_enabled != 0) { if (key.state.xfb_enabled != 0) {
info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); auto [varyings, count] =
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
info.xfb_varyings = varyings;
info.xfb_count = count;
} }
info.convert_depth_mode = gl_ndc; info.convert_depth_mode = gl_ndc;
break; break;

View file

@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}; };
} }
[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( [[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
std::vector<VkBufferCopy> result(copies.size()); boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
std::ranges::transform( std::ranges::transform(
copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{ return VkBufferCopy{
@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return result; return result;
} }
[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( [[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
struct Maker { struct Maker {
VkBufferImageCopy operator()(const BufferImageCopy& copy) const { VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
VkImageAspectFlags aspect_mask; VkImageAspectFlags aspect_mask;
}; };
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
std::vector<VkBufferImageCopy> result(copies.size() * 2); boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
std::ranges::transform(copies, result.begin(), std::ranges::transform(copies, result.begin(),
Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
std::ranges::transform(copies, result.begin() + copies.size(), std::ranges::transform(copies, result.begin() + copies.size(),
Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
return result; return result;
} else { } else {
std::vector<VkBufferImageCopy> result(copies.size()); boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
return result; return result;
} }
@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) { std::span<const VideoCommon::ImageCopy> copies) {
std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
const VkImageAspectFlags src_aspect_mask = src.AspectMask(); const VkImageAspectFlags src_aspect_mask = src.AspectMask();
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
void TextureCacheRuntime::CopyImage(Image& dst, Image& src, void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) { std::span<const VideoCommon::ImageCopy> copies) {
std::vector<VkImageCopy> vk_copies(copies.size()); boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
const VkImageAspectFlags aspect_mask = dst.AspectMask(); const VkImageAspectFlags aspect_mask = dst.AspectMask();
ASSERT(aspect_mask == src.AspectMask()); ASSERT(aspect_mask == src.AspectMask());
@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
ScaleDown(true); ScaleDown(true);
} }
scheduler->RequestOutsideRenderPassOperationContext(); scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer; const VkBuffer src_buffer = buffer;
const VkImage vk_image = *original_image; const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask; const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
if (is_rescaled) { if (is_rescaled) {
ScaleDown(); ScaleDown();
} }
boost::container::small_vector<VkBuffer, 1> buffers_vector{}; boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) { for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]); buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back( vk_copies.emplace_back(
@ -1858,7 +1859,7 @@ Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, bool is_rescaled) { ImageView* depth_buffer, bool is_rescaled) {
std::vector<VkImageView> attachments; boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
RenderPassKey renderpass_key{}; RenderPassKey renderpass_key{};
s32 num_layers = 1; s32 num_layers = 1;

View file

@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
marked_for_removal.end()); marked_for_removal.end());
std::vector<ShaderInfo*> removed_shaders; boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
removed_shaders.reserve(marked_for_removal.size());
std::scoped_lock lock{lookup_mutex}; std::scoped_lock lock{lookup_mutex};
for (Entry* const entry : marked_for_removal) { for (Entry* const entry : marked_for_removal) {
removed_shaders.push_back(entry->data); removed_shaders.push_back(entry->data);

View file

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <optional> #include <optional>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
@ -108,8 +109,8 @@ struct ImageBase {
std::vector<ImageViewInfo> image_view_infos; std::vector<ImageViewInfo> image_view_infos;
std::vector<ImageViewId> image_view_ids; std::vector<ImageViewId> image_view_ids;
std::vector<u32> slice_offsets; boost::container::small_vector<u32, 16> slice_offsets;
std::vector<SubresourceBase> slice_subresources; boost::container::small_vector<SubresourceBase, 16> slice_subresources;
std::vector<AliasedImage> aliased_images; std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images; std::vector<ImageId> overlapping_images;

View file

@ -526,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
template <class P> template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> images; boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) { if (!image.IsSafeDownload()) {
return; return;
@ -579,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
template <class P> template <class P>
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> deleted_images; boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) { for (const ImageId id : deleted_images) {
Image& image = slot_images[id]; Image& image = slot_images[id];
@ -593,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
template <class P> template <class P>
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
std::vector<ImageId> deleted_images; boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegionGPU(as_id, gpu_addr, size, ForEachImageInRegionGPU(as_id, gpu_addr, size,
[&](ImageId id, Image&) { deleted_images.push_back(id); }); [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) { for (const ImageId id : deleted_images) {
@ -1101,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
const bool native_bgr = runtime.HasNativeBgr(); const bool native_bgr = runtime.HasNativeBgr();
const bool flexible_formats = True(options & RelaxedOptions::Format); const bool flexible_formats = True(options & RelaxedOptions::Format);
ImageId image_id{}; ImageId image_id{};
boost::container::small_vector<ImageId, 1> image_ids; boost::container::small_vector<ImageId, 8> image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) { if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false; return false;
@ -1622,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
} }
} }
ImageId image_id{}; ImageId image_id{};
boost::container::small_vector<ImageId, 1> image_ids; boost::container::small_vector<ImageId, 8> image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) { if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false; return false;
@ -1942,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.map_view_id = map_id; image.map_view_id = map_id;
return; return;
} }
std::vector<ImageViewId> sparse_maps{}; boost::container::small_vector<ImageViewId, 16> sparse_maps;
ForEachSparseSegment( ForEachSparseSegment(
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@ -2217,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
template <class P> template <class P>
void TextureCache<P>::SynchronizeAliases(ImageId image_id) { void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
boost::container::small_vector<const AliasedImage*, 1> aliased_images; boost::container::small_vector<const AliasedImage*, 8> aliased_images;
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
bool any_modified = True(image.flags & ImageFlagBits::GpuModified); bool any_modified = True(image.flags & ImageFlagBits::GpuModified);

View file

@ -56,7 +56,7 @@ struct ImageViewInOut {
struct AsyncDecodeContext { struct AsyncDecodeContext {
ImageId image_id; ImageId image_id;
Common::ScratchBuffer<u8> decoded_data; Common::ScratchBuffer<u8> decoded_data;
std::vector<BufferImageCopy> copies; boost::container::small_vector<BufferImageCopy, 16> copies;
std::mutex mutex; std::mutex mutex;
std::atomic_bool complete; std::atomic_bool complete;
}; };
@ -429,7 +429,7 @@ private:
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
VAddr virtual_invalid_space{}; VAddr virtual_invalid_space{};

View file

@ -329,13 +329,13 @@ template <u32 GOB_EXTENT>
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); const auto slice_offsets = CalculateSliceOffsets(new_info);
const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
const auto it = std::ranges::find(slice_offsets, diff); const auto it = std::ranges::find(slice_offsets, diff);
if (it == slice_offsets.end()) { if (it == slice_offsets.end()) {
return std::nullopt; return std::nullopt;
} }
const std::vector subresources = CalculateSliceSubresources(new_info); const auto subresources = CalculateSliceSubresources(new_info);
const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
const ImageInfo& info = overlap.info; const ImageInfo& info = overlap.info;
if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
return sizes; return sizes;
} }
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D); ASSERT(info.type == ImageType::e3D);
std::vector<u32> offsets; boost::container::small_vector<u32, 16> offsets;
offsets.reserve(NumSlices(info)); offsets.reserve(NumSlices(info));
const LevelInfo level_info = MakeLevelInfo(info); const LevelInfo level_info = MakeLevelInfo(info);
@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
return offsets; return offsets;
} }
std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D); ASSERT(info.type == ImageType::e3D);
std::vector<SubresourceBase> subresources; boost::container::small_vector<SubresourceBase, 16> subresources;
subresources.reserve(NumSlices(info)); subresources.reserve(NumSlices(info));
for (s32 level = 0; level < info.resources.levels; ++level) { for (s32 level = 0; level < info.resources.levels; ++level) {
const s32 depth = AdjustMipSize(info.size.depth, level); const s32 depth = AdjustMipSize(info.size.depth, level);
@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
} }
} }
std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
SubresourceBase base, u32 up_scale, u32 down_shift) { const ImageInfo& src,
SubresourceBase base,
u32 up_scale, u32 down_shift) {
ASSERT(dst.resources.levels >= src.resources.levels); ASSERT(dst.resources.levels >= src.resources.levels);
const bool is_dst_3d = dst.type == ImageType::e3D; const bool is_dst_3d = dst.type == ImageType::e3D;
@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
ASSERT(src.resources.levels == 1); ASSERT(src.resources.levels == 1);
} }
const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
std::vector<ImageCopy> copies; boost::container::small_vector<ImageCopy, 16> copies;
copies.reserve(src.resources.levels); copies.reserve(src.resources.levels);
for (s32 level = 0; level < src.resources.levels; ++level) { for (s32 level = 0; level < src.resources.levels; ++level) {
ImageCopy& copy = copies.emplace_back(); ImageCopy& copy = copies.emplace_back();
@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies; return copies;
} }
std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
u32 down_shift) { u32 up_scale,
std::vector<ImageCopy> copies; u32 down_shift) {
boost::container::small_vector<ImageCopy, 16> copies;
copies.reserve(src.resources.levels); copies.reserve(src.resources.levels);
const bool is_3d = src.type == ImageType::e3D; const bool is_3d = src.type == ImageType::e3D;
for (s32 level = 0; level < src.resources.levels; ++level) { for (s32 level = 0; level < src.resources.levels; ++level) {
@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
} }
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
const ImageInfo& info, std::span<const u8> input, GPUVAddr gpu_addr,
std::span<u8> output) { const ImageInfo& info,
std::span<const u8> input,
std::span<u8> output) {
const size_t guest_size_bytes = input.size_bytes(); const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format); const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const Extent3D size = info.size; const Extent3D size = info.size;
@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
info.tile_width_spacing); info.tile_width_spacing);
size_t guest_offset = 0; size_t guest_offset = 0;
u32 host_offset = 0; u32 host_offset = 0;
std::vector<BufferImageCopy> copies(num_levels); boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) { for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level); const Extent3D level_size = AdjustMipSize(size, level);
@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
} }
} }
std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size; const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format); const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) { if (info.type == ImageType::Linear) {
@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
u32 host_offset = 0; u32 host_offset = 0;
std::vector<BufferImageCopy> copies(num_levels); boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) { for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level); const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
return AdjustMipBlockSize(num_tiles, level_info.block, level); return AdjustMipBlockSize(num_tiles, level_info.block, level);
} }
std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
const Extent2D tile_size = DefaultBlockSize(info.format); const Extent2D tile_size = DefaultBlockSize(info.format);
if (info.type == ImageType::Linear) { if (info.type == ImageType::Linear) {
return std::vector{SwizzleParameters{ return {SwizzleParameters{
.num_tiles = AdjustTileSize(info.size, tile_size), .num_tiles = AdjustTileSize(info.size, tile_size),
.block = {}, .block = {},
.buffer_offset = 0, .buffer_offset = 0,
@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
const s32 num_levels = info.resources.levels; const s32 num_levels = info.resources.levels;
u32 guest_offset = 0; u32 guest_offset = 0;
std::vector<SwizzleParameters> params(num_levels); boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
for (s32 level = 0; level < num_levels; ++level) { for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level); const Extent3D level_size = AdjustMipSize(size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);

View file

@ -5,6 +5,7 @@
#include <optional> #include <optional>
#include <span> #include <span>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
@ -40,9 +41,10 @@ struct OverlapResult {
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); [[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); [[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
const ImageInfo& info);
[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
@ -51,21 +53,18 @@ struct OverlapResult {
[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
const ImageInfo& src, const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
SubresourceBase base, u32 up_scale = 1, u32 down_shift = 0);
u32 down_shift = 0);
[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
u32 up_scale = 1, const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
u32 down_shift = 0);
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
GPUVAddr gpu_addr, const ImageInfo& info, Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span<const u8> input, std::span<const u8> input, std::span<u8> output);
std::span<u8> output);
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span<u8> output); const ImageBase& image, std::span<u8> output);
@ -73,13 +72,15 @@ struct OverlapResult {
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies); std::span<BufferImageCopy> copies);
[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
const ImageInfo& info);
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); [[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
const ImageInfo& info);
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span<const BufferImageCopy> copies, std::span<const u8> memory, std::span<const BufferImageCopy> copies, std::span<const u8> memory,

View file

@ -13,7 +13,7 @@
namespace VideoCommon { namespace VideoCommon {
std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
const TransformFeedbackState& state) { const TransformFeedbackState& state) {
static constexpr std::array VECTORS{ static constexpr std::array VECTORS{
28U, // gl_Position 28U, // gl_Position
@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
216U, // gl_TexCoord[6] 216U, // gl_TexCoord[6]
220U, // gl_TexCoord[7] 220U, // gl_TexCoord[7]
}; };
std::vector<Shader::TransformFeedbackVarying> xfb(256); std::array<Shader::TransformFeedbackVarying, 256> xfb{};
u32 count{0};
for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
const auto& locations = state.varyings[buffer]; const auto& locations = state.varyings[buffer];
const auto& layout = state.layouts[buffer]; const auto& layout = state.layouts[buffer];
@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
} }
} }
xfb[attribute] = varying; xfb[attribute] = varying;
count = std::max(count, attribute);
highest = std::max(highest, (base_offset + varying.components) * 4); highest = std::max(highest, (base_offset + varying.components) * 4);
} }
UNIMPLEMENTED_IF(highest != layout.stride); UNIMPLEMENTED_IF(highest != layout.stride);
} }
return xfb; return {xfb, count + 1};
} }
} // namespace VideoCommon } // namespace VideoCommon

View file

@ -24,7 +24,7 @@ struct TransformFeedbackState {
varyings; varyings;
}; };
std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
const TransformFeedbackState& state); const TransformFeedbackState& state);
} // namespace VideoCommon } // namespace VideoCommon

View file

@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
std::vector<const char*> ExtensionListForVulkan( std::vector<const char*> ExtensionListForVulkan(
const std::set<std::string, std::less<>>& extensions) { const std::set<std::string, std::less<>>& extensions) {
std::vector<const char*> output; std::vector<const char*> output;
output.reserve(extensions.size());
for (const auto& extension : extensions) { for (const auto& extension : extensions) {
output.push_back(extension.c_str()); output.push_back(extension.c_str());
} }