diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a14641b97f..890fc6c638 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -214,7 +214,8 @@ Device::Device() has_precise_bug = TestPreciseBug(); has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && - GLAD_GL_NV_compute_program5; + GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && + GLAD_GL_NV_transform_feedback2; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 55e79aaf65..f802fd384d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -93,6 +93,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, return buffer.size; } +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", static_cast(index)); + return {GL_POSITION, 0}; +} + void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } @@ -1547,12 +1575,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); } +void RasterizerOpenGL::SyncTransformFeedback() { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs = system.GPU().Maxwell3D().regs; + + static constexpr std::size_t STRIDE = 3; + std::array attribs; + std::array streams; + + GLint* cursor = attribs.data(); + GLint* current_stream = streams.data(); + + for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + + *current_stream = static_cast(feedback); + if (current_stream != streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += STRIDE; + } + } + + const GLsizei num_attribs = static_cast((cursor - attribs.data()) / STRIDE); + const GLsizei num_strides = static_cast(current_stream - streams.data()); + glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), + GL_INTERLEAVED_ATTRIBS); +} + void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { const auto& regs = system.GPU().Maxwell3D().regs; if (regs.tfb_enabled == 0) { return; } + if (device.UseAssemblyShaders()) { + SyncTransformFeedback(); + } + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); @@ -1579,6 +1665,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { static_cast(size)); } + // We may have to call BeginTransformFeedbackNV here since they seem to call different + // implementations on Nvidia's driver (the pointer is different) but we are using + // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB + // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. glBeginTransformFeedback(GL_POINTS); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f5dc56a0e2..7abc8fdbd4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -202,6 +202,10 @@ private: /// Syncs the framebuffer sRGB state to match the guest state void SyncFramebufferSRGB(); + /// Syncs transform feedback state to match guest state + /// @note Only valid on assembly shaders + void SyncTransformFeedback(); + /// Begin a transform feedback void BeginTransformFeedback(GLenum primitive_mode);