From 659039ca6df543f101c80858fe55a880645b773e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 7 Aug 2021 15:12:15 -0400 Subject: [PATCH 1/8] nvdec: Implement GPU accelerated decoding for all platforms Supplements the VAAPI intel gpu decoder by implementing the D3D11VA decoder for Windows, and CUVID/VDPAU for Nvidia and AMD on drivers linux respectively. --- .../command_classes/codecs/codec.cpp | 152 ++++++++++-------- src/video_core/command_classes/codecs/codec.h | 8 +- 2 files changed, 91 insertions(+), 69 deletions(-) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f798a00531..e4ee63e315 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -16,44 +16,17 @@ extern "C" { } namespace Tegra { -#if defined(LIBVA_FOUND) -// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license namespace { -constexpr std::array VAAPI_DRIVERS = { - "i915", - "amdgpu", -}; - -AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) { +AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { - if (*p == AV_PIX_FMT_VAAPI) { - return AV_PIX_FMT_VAAPI; + if (*p == av_codec_ctx->pix_fmt) { + return av_codec_ctx->pix_fmt; } } LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); - return *pix_fmts; -} - -bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) { - AVDictionary* hwdevice_options = nullptr; - av_dict_set(&hwdevice_options, "connection_type", "drm", 0); - for (const auto& driver : VAAPI_DRIVERS) { - av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); - const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI, - nullptr, hwdevice_options, 0); - if (hwdevice_error >= 0) { - LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); - av_dict_free(&hwdevice_options); - return true; - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); - av_dict_free(&hwdevice_options); - return false; + return AV_PIX_FMT_NONE; } } // namespace -#endif void AVFrameDeleter(AVFrame* ptr) { av_frame_free(&ptr); @@ -69,26 +42,75 @@ Codec::~Codec() { } // Free libav memory avcodec_send_packet(av_codec_ctx, nullptr); - AVFrame* av_frame = av_frame_alloc(); - avcodec_receive_frame(av_codec_ctx, av_frame); + AVFramePtr av_frame{av_frame_alloc(), AVFrameDeleter}; + avcodec_receive_frame(av_codec_ctx, av_frame.get()); avcodec_flush_buffers(av_codec_ctx); - av_frame_free(&av_frame); avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); + av_buffer_unref(&av_gpu_decoder); } -void Codec::InitializeHwdec() { - // Prioritize integrated GPU to mitigate bandwidth bottlenecks +bool Codec::CreateGpuAvDevice() { #if defined(LIBVA_FOUND) - if (CreateVaapiHwdevice(&av_hw_device)) { - const auto hw_device_ctx = av_buffer_ref(av_hw_device); - ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); - av_codec_ctx->hw_device_ctx = hw_device_ctx; - av_codec_ctx->get_format = GetHwFormat; + static constexpr std::array VAAPI_DRIVERS = { + "i915", + "iHD", + "amdgpu", + }; + AVDictionary* hwdevice_options = nullptr; + av_dict_set(&hwdevice_options, "connection_type", "drm", 0); + for (const auto& driver : VAAPI_DRIVERS) { + av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); + const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, + nullptr, hwdevice_options, 0); + if (hwdevice_error >= 0) { + LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); + av_dict_free(&hwdevice_options); + av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; + return true; + } + LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); + } + LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); + av_dict_free(&hwdevice_options); +#endif + static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; + static constexpr std::array GPU_DECODER_TYPES{ + AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, +#else + AV_HWDEVICE_TYPE_VDPAU, +#endif + }; + for (const auto& type : GPU_DECODER_TYPES) { + av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); + for (int i = 0;; i++) { + const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); + if (!config) { + LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.", + av_codec->name, av_hwdevice_get_type_name(type)); + break; + } + if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { + av_codec_ctx->pix_fmt = config->pix_fmt; + LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); + return true; + } + } + } + return false; +} + +void Codec::InitializeGpuDecoder() { + if (!CreateGpuAvDevice()) { + av_buffer_unref(&av_gpu_decoder); return; } -#endif - // TODO more GPU accelerated decoders + auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder); + ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); + av_codec_ctx->hw_device_ctx = hw_device_ctx; + av_codec_ctx->get_format = GetGpuFormat; + using_gpu_decode = true; } void Codec::Initialize() { @@ -107,7 +129,8 @@ void Codec::Initialize() { av_codec = avcodec_find_decoder(codec); av_codec_ctx = avcodec_alloc_context3(av_codec); av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - InitializeHwdec(); + + InitializeGpuDecoder(); if (!av_codec_ctx->hw_device_ctx) { LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); } @@ -115,7 +138,7 @@ void Codec::Initialize() { if (av_error < 0) { LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); + av_buffer_unref(&av_gpu_decoder); return; } initialized = true; @@ -153,38 +176,33 @@ void Codec::Decode() { if (vp9_hidden_frame) { return; } - AVFrame* hw_frame = av_frame_alloc(); - AVFrame* sw_frame = hw_frame; - ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed"); - if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) { + AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; + AVFramePtr final_frame{nullptr, AVFrameDeleter}; + ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); + if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); - av_frame_free(&hw_frame); return; } - if (!hw_frame->width || !hw_frame->height) { + if (initial_frame->width == 0 || initial_frame->height == 0) { LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); - av_frame_free(&hw_frame); return; } -#if defined(LIBVA_FOUND) - // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license - if (hw_frame->format == AV_PIX_FMT_VAAPI) { - sw_frame = av_frame_alloc(); - ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed"); + if (using_gpu_decode) { + final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp // because Intel drivers crash unless using AV_PIX_FMT_NV12 - sw_frame->format = AV_PIX_FMT_NV12; - const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0); - ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret); - av_frame_free(&hw_frame); + final_frame->format = AV_PIX_FMT_NV12; + const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); + ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); + } else { + final_frame = std::move(initial_frame); } -#endif - if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) { - UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format); - av_frame_free(&sw_frame); + if (final_frame->format != AV_PIX_FMT_YUV420P && final_frame->format != AV_PIX_FMT_NV12) { + UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter}); + av_frames.push(std::move(final_frame)); if (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 71936203fb..abfe592217 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -50,18 +50,22 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec [[nodiscard]] std::string_view GetCurrentCodecName() const; private: - void InitializeHwdec(); + void InitializeGpuDecoder(); + + bool CreateGpuAvDevice(); bool initialized{}; + bool using_gpu_decode{}; NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; AVCodec* av_codec{nullptr}; - AVBufferRef* av_hw_device{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; + AVBufferRef* av_gpu_decoder{nullptr}; GPU& gpu; const NvdecCommon::NvdecRegisters& state; From 0be4e402e2137c715fe4d5655f93a7b2fd9f4c27 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 7 Aug 2021 15:19:20 -0400 Subject: [PATCH 2/8] cmake: Always find LIBVA, update windows FFmpeg version Allows the use of VAAPI gpu decoders on system installed ffmpeg as well. --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de2413843a..c98c176072 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -518,6 +518,10 @@ set(FFmpeg_COMPONENTS avutil swscale) +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + Include(FindPkgConfig REQUIRED) + pkg_check_modules(LIBVA libva) +endif() if (NOT YUZU_USE_BUNDLED_FFMPEG) # Use system installed FFmpeg find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS}) @@ -583,10 +587,6 @@ if (YUZU_USE_BUNDLED_FFMPEG) "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}" CACHE PATH "Path to FFmpeg headers" FORCE) - if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - Include(FindPkgConfig REQUIRED) - pkg_check_modules(LIBVA libva) - endif() if(LIBVA_FOUND) pkg_check_modules(LIBDRM libdrm REQUIRED) find_package(X11 REQUIRED) @@ -670,7 +670,7 @@ if (YUZU_USE_BUNDLED_FFMPEG) endif() else() # WIN32 # Use yuzu FFmpeg binaries - set(FFmpeg_EXT_NAME "ffmpeg-4.3.1") + set(FFmpeg_EXT_NAME "ffmpeg-4.4") set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}") download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "") set(FFmpeg_FOUND YES) From 356e10898f47aec113e45962ee3480353dadf3bc Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 7 Aug 2021 15:31:14 -0400 Subject: [PATCH 3/8] codec: Replace deprecated av_init_packet usage --- .../command_classes/codecs/codec.cpp | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index e4ee63e315..0ad6162ca4 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -134,9 +134,8 @@ void Codec::Initialize() { if (!av_codec_ctx->hw_device_ctx) { LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); } - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); avcodec_close(av_codec_ctx); av_buffer_unref(&av_gpu_decoder); return; @@ -164,12 +163,17 @@ void Codec::Decode() { frame_data = vp9_decoder->ComposeFrameHeader(state); vp9_hidden_frame = vp9_decoder->WasFrameHidden(); } - AVPacket packet{}; - av_init_packet(&packet); - packet.data = frame_data.data(); - packet.size = static_cast(frame_data.size()); - if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) { - LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret); + AVPacket* packet = av_packet_alloc(); + if (!packet) { + LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); + return; + } + packet->data = frame_data.data(); + packet->size = static_cast(frame_data.size()); + const int send_pkt_ret = avcodec_send_packet(av_codec_ctx, packet); + av_packet_free(&packet); + if (send_pkt_ret != 0) { + LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", send_pkt_ret); return; } // Only receive/store visible frames From 92bc51b66ab687072488b3ff33a2a090290da49e Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 7 Aug 2021 01:02:51 -0400 Subject: [PATCH 4/8] cmake: Add VDPAU and NVDEC support to FFmpeg Adds {h264_,vp9_}{nvdec,vdpau} hwaccels. --- CMakeLists.txt | 80 ++++++++++++++++++++++++++++++++--- src/video_core/CMakeLists.txt | 1 + 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c98c176072..fa8c852ac8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -544,6 +544,9 @@ endif() if (YUZU_USE_BUNDLED_FFMPEG) if (NOT WIN32) + # TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to + # externals/ffmpeg/ffmpeg) + # Build FFmpeg from externals message(STATUS "Using FFmpeg from externals") @@ -583,16 +586,23 @@ if (YUZU_USE_BUNDLED_FFMPEG) CACHE PATH "Paths to FFmpeg libraries" FORCE) endforeach() - set(FFmpeg_INCLUDE_DIR - "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}" - CACHE PATH "Path to FFmpeg headers" FORCE) + Include(FindPkgConfig REQUIRED) + pkg_check_modules(LIBVA libva) + pkg_check_modules(CUDA cuda) + pkg_check_modules(FFNVCODEC ffnvcodec) + pkg_check_modules(VDPAU vdpau) + + set(FFmpeg_HWACCEL_LIBRARIES) + set(FFmpeg_HWACCEL_FLAGS) + set(FFmpeg_HWACCEL_INCLUDE_DIRS) + set(FFmpeg_HWACCEL_LDFLAGS) if(LIBVA_FOUND) pkg_check_modules(LIBDRM libdrm REQUIRED) find_package(X11 REQUIRED) pkg_check_modules(LIBVA-DRM libva-drm REQUIRED) pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED) - set(FFmpeg_LIBVA_LIBRARIES + list(APPEND FFmpeg_HWACCEL_LIBRARIES ${LIBDRM_LIBRARIES} ${X11_LIBRARIES} ${LIBVA-DRM_LIBRARIES} @@ -602,11 +612,56 @@ if (YUZU_USE_BUNDLED_FFMPEG) --enable-hwaccel=h264_vaapi --enable-hwaccel=vp9_vaapi --enable-libdrm) + list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS + ${LIBDRM_INCLUDE_DIRS} + ${X11_INCLUDE_DIRS} + ${LIBVA-DRM_INCLUDE_DIRS} + ${LIBVA-X11_INCLUDE_DIRS} + ${LIBVA_INCLUDE_DIRS} + ) message(STATUS "VA-API found") else() set(FFmpeg_HWACCEL_FLAGS --disable-vaapi) endif() + if (FFNVCODEC_FOUND AND CUDA_FOUND) + list(APPEND FFmpeg_HWACCEL_FLAGS + --enable-cuvid + --enable-ffnvcodec + --enable-nvdec + --enable-hwaccel=h264_nvdec + --enable-hwaccel=vp9_nvdec + --extra-cflags=-I${CUDA_INCLUDE_DIRS} + ) + list(APPEND FFmpeg_HWACCEL_LIBRARIES + ${FFNVCODEC_LIBRARIES} + ${CUDA_LIBRARIES} + ) + list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS + ${FFNVCODEC_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS} + ) + list(APPEND FFmpeg_HWACCEL_LDFLAGS + ${FFNVCODEC_LDFLAGS} + ${CUDA_LDFLAGS} + ) + message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found") + endif() + + if (VDPAU_FOUND) + list(APPEND FFmpeg_HWACCEL_FLAGS + --enable-vdpau + --enable-hwaccel=h264_vdpau + --enable-hwaccel=vp9_vdpau + ) + list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES}) + list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS}) + list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS}) + message(STATUS "vdpau libraries version ${VDPAU_VERSION} found") + else() + list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau) + endif() + # `configure` parameters builds only exactly what yuzu needs from FFmpeg # `--disable-vdpau` is needed to avoid linking issues add_custom_command( @@ -624,7 +679,6 @@ if (YUZU_USE_BUNDLED_FFMPEG) --disable-network --disable-postproc --disable-swresample - --disable-vdpau --enable-decoder=h264 --enable-decoder=vp9 --cc="${CMAKE_C_COMPILER}" @@ -653,15 +707,26 @@ if (YUZU_USE_BUNDLED_FFMPEG) ${FFmpeg_BUILD_DIR} ) + set(FFmpeg_INCLUDE_DIR + "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}" + CACHE PATH "Path to FFmpeg headers" FORCE) + + set(FFmpeg_LDFLAGS + "${FFmpeg_HWACCEL_LDFLAGS}" + CACHE STRING "FFmpeg linker flags" FORCE) + # ALL makes this custom target build every time # but it won't actually build if the DEPENDS parameter is up to date add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE}) add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure) link_libraries(${FFmpeg_LIBVA_LIBRARIES}) - set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES} + set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES} CACHE PATH "Paths to FFmpeg libraries" FORCE) unset(FFmpeg_BUILD_LIBRARIES) - unset(FFmpeg_LIBVA_LIBRARIES) + unset(FFmpeg_HWACCEL_FLAGS) + unset(FFmpeg_HWACCEL_INCLUDE_DIRS) + unset(FFmpeg_HWACCEL_LDFLAGS) + unset(FFmpeg_HWACCEL_LIBRARIES) if (FFmpeg_FOUND) message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}") @@ -676,6 +741,7 @@ if (YUZU_USE_BUNDLED_FFMPEG) set(FFmpeg_FOUND YES) set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE) set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE) + set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE) set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE) set(FFmpeg_LIBRARIES ${FFmpeg_LIBRARY_DIR}/swscale.lib diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2f6cdd216d..269db21a57 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -231,6 +231,7 @@ endif() target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) +target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) From bc3efb79cc11a98005a9c036d9474fbf9cb7042f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 10 Aug 2021 22:12:45 -0400 Subject: [PATCH 5/8] codec: Fallback to CPU decoding if no compatible GPU format is found --- .../command_classes/codecs/codec.cpp | 53 +++++++++++-------- src/video_core/command_classes/codecs/codec.h | 1 - 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 0ad6162ca4..400834129e 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -17,6 +17,9 @@ extern "C" { namespace Tegra { namespace { +constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; +constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; + AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { if (*p == av_codec_ctx->pix_fmt) { @@ -24,7 +27,9 @@ AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pi } } LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); - return AV_PIX_FMT_NONE; + av_buffer_unref(&av_codec_ctx->hw_device_ctx); + av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT; + return PREFERRED_CPU_FMT; } } // namespace @@ -83,7 +88,12 @@ bool Codec::CreateGpuAvDevice() { #endif }; for (const auto& type : GPU_DECODER_TYPES) { - av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); + const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); + if (hwdevice_res < 0) { + LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", + av_hwdevice_get_type_name(type), hwdevice_res); + continue; + } for (int i = 0;; i++) { const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); if (!config) { @@ -110,36 +120,34 @@ void Codec::InitializeGpuDecoder() { ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); av_codec_ctx->hw_device_ctx = hw_device_ctx; av_codec_ctx->get_format = GetGpuFormat; - using_gpu_decode = true; } void Codec::Initialize() { - AVCodecID codec; - switch (current_codec) { - case NvdecCommon::VideoCodec::H264: - codec = AV_CODEC_ID_H264; - break; - case NvdecCommon::VideoCodec::Vp9: - codec = AV_CODEC_ID_VP9; - break; - default: - UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); - return; - } + const AVCodecID codec = [&] { + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + return AV_CODEC_ID_H264; + case NvdecCommon::VideoCodec::Vp9: + return AV_CODEC_ID_VP9; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + return AV_CODEC_ID_NONE; + } + }(); av_codec = avcodec_find_decoder(codec); av_codec_ctx = avcodec_alloc_context3(av_codec); av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); InitializeGpuDecoder(); - if (!av_codec_ctx->hw_device_ctx) { - LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); - } if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); avcodec_close(av_codec_ctx); av_buffer_unref(&av_gpu_decoder); return; } + if (!av_codec_ctx->hw_device_ctx) { + LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); + } initialized = true; } @@ -155,6 +163,9 @@ void Codec::Decode() { if (is_first_frame) { Initialize(); } + if (!initialized) { + return; + } bool vp9_hidden_frame = false; std::vector frame_data; if (current_codec == NvdecCommon::VideoCodec::H264) { @@ -191,18 +202,18 @@ void Codec::Decode() { LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); return; } - if (using_gpu_decode) { + if (av_codec_ctx->hw_device_ctx) { final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp // because Intel drivers crash unless using AV_PIX_FMT_NV12 - final_frame->format = AV_PIX_FMT_NV12; + final_frame->format = PREFERRED_GPU_FMT; const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); } else { final_frame = std::move(initial_frame); } - if (final_frame->format != AV_PIX_FMT_YUV420P && final_frame->format != AV_PIX_FMT_NV12) { + if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) { UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index abfe592217..f51ab9df02 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -60,7 +60,6 @@ private: bool CreateGpuAvDevice(); bool initialized{}; - bool using_gpu_decode{}; NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; AVCodec* av_codec{nullptr}; From a832aa699f783f6ae0a6a1468b0aa6bc7d68c5d2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 7 Aug 2021 23:57:22 -0400 Subject: [PATCH 6/8] codec: Improve libav memory alloc and cleanup --- .../command_classes/codecs/codec.cpp | 31 ++++++++++--------- src/video_core/command_classes/codecs/codec.h | 2 ++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 400834129e..18aa40ca3c 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -20,6 +20,12 @@ namespace { constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; +void AVPacketDeleter(AVPacket* ptr) { + av_packet_free(&ptr); +} + +using AVPacketPtr = std::unique_ptr; + AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { if (*p == av_codec_ctx->pix_fmt) { @@ -46,11 +52,7 @@ Codec::~Codec() { return; } // Free libav memory - avcodec_send_packet(av_codec_ctx, nullptr); - AVFramePtr av_frame{av_frame_alloc(), AVFrameDeleter}; - avcodec_receive_frame(av_codec_ctx, av_frame.get()); - avcodec_flush_buffers(av_codec_ctx); - avcodec_close(av_codec_ctx); + avcodec_free_context(&av_codec_ctx); av_buffer_unref(&av_gpu_decoder); } @@ -111,6 +113,11 @@ bool Codec::CreateGpuAvDevice() { return false; } +void Codec::InitializeAvCodecContext() { + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); +} + void Codec::InitializeGpuDecoder() { if (!CreateGpuAvDevice()) { av_buffer_unref(&av_gpu_decoder); @@ -135,13 +142,11 @@ void Codec::Initialize() { } }(); av_codec = avcodec_find_decoder(codec); - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - + InitializeAvCodecContext(); InitializeGpuDecoder(); if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); - avcodec_close(av_codec_ctx); + avcodec_free_context(&av_codec_ctx); av_buffer_unref(&av_gpu_decoder); return; } @@ -174,17 +179,15 @@ void Codec::Decode() { frame_data = vp9_decoder->ComposeFrameHeader(state); vp9_hidden_frame = vp9_decoder->WasFrameHidden(); } - AVPacket* packet = av_packet_alloc(); + AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; if (!packet) { LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); return; } packet->data = frame_data.data(); packet->size = static_cast(frame_data.size()); - const int send_pkt_ret = avcodec_send_packet(av_codec_ctx, packet); - av_packet_free(&packet); - if (send_pkt_ret != 0) { - LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", send_pkt_ret); + if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { + LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); return; } // Only receive/store visible frames diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index f51ab9df02..1508d36c2b 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -55,6 +55,8 @@ public: [[nodiscard]] std::string_view GetCurrentCodecName() const; private: + void InitializeAvCodecContext(); + void InitializeGpuDecoder(); bool CreateGpuAvDevice(); From cd016d3cb5191b9f4f2756e440a6aa67e577c414 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 8 Aug 2021 16:56:40 -0400 Subject: [PATCH 7/8] configure_graphics: Add GPU nvdec decoding as an option Some system configurations may see visual regressions or lower performance using GPU decoding compared to CPU decoding. This setting provides the option for users to specify their decoding preference. Co-Authored-By: yzct12345 <87620833+yzct12345@users.noreply.github.com> --- src/common/settings.cpp | 4 +- src/common/settings.h | 8 ++- src/core/telemetry_session.cpp | 16 +++++- .../command_classes/codecs/codec.cpp | 6 ++- src/video_core/video_core.cpp | 3 +- src/yuzu/configuration/config.cpp | 7 ++- src/yuzu/configuration/config.h | 1 + src/yuzu/configuration/configure_graphics.cpp | 42 ++++++++++++--- src/yuzu/configuration/configure_graphics.h | 1 + src/yuzu/configuration/configure_graphics.ui | 51 ++++++++++++++++--- src/yuzu_cmd/config.cpp | 2 +- src/yuzu_cmd/default_ini.h | 6 +-- 12 files changed, 120 insertions(+), 27 deletions(-) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 9963159999..33665eab88 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -54,7 +54,7 @@ void LogSettings() { log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); - log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); + log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); @@ -137,7 +137,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_disk_shader_cache.SetGlobal(true); values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); - values.use_nvdec_emulation.SetGlobal(true); + values.nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); values.shader_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 1ba9b606c8..28074c1b90 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -48,6 +48,12 @@ enum class FullscreenMode : u32 { Exclusive = 1, }; +enum class NvdecEmulation : u32 { + Off = 0, + CPU = 1, + GPU = 2, +}; + /** The BasicSetting class is a simple resource manager. It defines a label and default value * alongside the actual value of the setting for simpler and less-error prone use with frontend * configurations. Setting a default value and label is required, though subclasses may deviate from @@ -466,7 +472,7 @@ struct Values { RangedSetting gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal, GPUAccuracy::Extreme, "gpu_accuracy"}; Setting use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"}; - Setting use_nvdec_emulation{true, "use_nvdec_emulation"}; + Setting nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; Setting accelerate_astc{true, "accelerate_astc"}; Setting use_vsync{true, "use_vsync"}; BasicRangedSetting fps_cap{1000, 1, 1000, "fps_cap"}; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 5a8cfd301b..1f16079980 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) { return "Unknown"; } +static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) { + switch (backend) { + case Settings::NvdecEmulation::Off: + return "Off"; + case Settings::NvdecEmulation::CPU: + return "CPU"; + case Settings::NvdecEmulation::GPU: + return "GPU"; + } + return "Unknown"; +} + u64 GetTelemetryId() { u64 telemetry_id{}; const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; @@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation.GetValue()); - AddField(field_type, "Renderer_UseNvdecEmulation", - Settings::values.use_nvdec_emulation.GetValue()); + AddField(field_type, "Renderer_NvdecEmulation", + TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_ShaderBackend", diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 18aa40ca3c..61966cbfe8 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -5,6 +5,7 @@ #include #include #include "common/assert.h" +#include "common/settings.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" #include "video_core/command_classes/codecs/vp9.h" @@ -142,8 +143,11 @@ void Codec::Initialize() { } }(); av_codec = avcodec_find_decoder(codec); + InitializeAvCodecContext(); - InitializeGpuDecoder(); + if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) { + InitializeGpuDecoder(); + } if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); avcodec_free_context(&av_codec_ctx); diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 3b575db4d8..cae543a511 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -37,7 +37,8 @@ std::unique_ptr CreateRenderer( namespace VideoCore { std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); + const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); + const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); auto gpu = std::make_unique(system, use_async, use_nvdec); auto context = emu_window.CreateSharedContext(); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 380379eb4f..6aec3c46c9 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -811,7 +811,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_disk_shader_cache); ReadGlobalSetting(Settings::values.gpu_accuracy); ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - ReadGlobalSetting(Settings::values.use_nvdec_emulation); + ReadGlobalSetting(Settings::values.nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); ReadGlobalSetting(Settings::values.shader_backend); @@ -1348,7 +1348,10 @@ void Config::SaveRendererValues() { static_cast(Settings::values.gpu_accuracy.GetDefault()), Settings::values.gpu_accuracy.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - WriteGlobalSetting(Settings::values.use_nvdec_emulation); + WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()), + static_cast(Settings::values.nvdec_emulation.GetValue(global)), + static_cast(Settings::values.nvdec_emulation.GetDefault()), + Settings::values.nvdec_emulation.UsingGlobal()); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index c1d7feb9f7..3346239971 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -182,5 +182,6 @@ private: Q_DECLARE_METATYPE(Settings::CPUAccuracy); Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::FullscreenMode); +Q_DECLARE_METATYPE(Settings::NvdecEmulation); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 37e8962587..c594164be7 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() { ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); - ui->use_nvdec_emulation->setEnabled(runtime_lock); + ui->nvdec_emulation_widget->setEnabled(runtime_lock); ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( Settings::values.use_asynchronous_gpu_emulation.GetValue()); - ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->api->setCurrentIndex(static_cast(Settings::values.renderer_backend.GetValue())); ui->fullscreen_mode_combobox->setCurrentIndex( static_cast(Settings::values.fullscreen_mode.GetValue())); + ui->nvdec_emulation->setCurrentIndex( + static_cast(Settings::values.nvdec_emulation.GetValue())); ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation, + &Settings::values.nvdec_emulation); + ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget, + !Settings::values.nvdec_emulation.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, &Settings::values.fullscreen_mode); ConfigurationShared::SetHighlight(ui->fullscreen_mode_label, @@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, ui->use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, - ui->use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, accelerate_astc); @@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.nvdec_emulation.UsingGlobal()) { + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } if (Settings::values.shader_backend.UsingGlobal()) { Settings::values.shader_backend.SetValue(shader_backend); } @@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() { } } + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + } else { + Settings::values.nvdec_emulation.SetGlobal(false); + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } + if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.bg_red.SetGlobal(true); Settings::values.bg_green.SetGlobal(true); @@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { ConfigurationShared::USE_GLOBAL_OFFSET); } +Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const { + if (Settings::IsConfiguringGlobal()) { + return static_cast(ui->nvdec_emulation->currentIndex()); + } + + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + return Settings::values.nvdec_emulation.GetValue(); + } + Settings::values.nvdec_emulation.SetGlobal(false); + return static_cast(ui->nvdec_emulation->currentIndex() - + ConfigurationShared::USE_GLOBAL_OFFSET); +} + void ConfigureGraphics::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal()); @@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); - ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); + ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); @@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredTristate( ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); - ConfigurationShared::SetColoredTristate( - ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, accelerate_astc); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, @@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() { static_cast(Settings::values.fullscreen_mode.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast(Settings::values.renderer_backend.GetValue(true))); + ConfigurationShared::InsertGlobalItem( + ui->nvdec_emulation, static_cast(Settings::values.nvdec_emulation.GetValue(true))); } diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index c866b911bd..7d7ac329dc 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -43,6 +43,7 @@ private: void SetupPerGameUI(); Settings::RendererBackend GetCurrentGraphicsBackend() const; + Settings::NvdecEmulation GetCurrentNvdecEmulation() const; std::unique_ptr ui; QColor bg_color; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 099ddbb7c7..41e930f6bf 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -167,13 +167,6 @@ - - - - Use NVDEC emulation - - - @@ -181,6 +174,50 @@ + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + NVDEC emulation: + + + + + + + + Disabled + + + + + CPU Decoding + + + + + GPU Decoding + + + + + + + diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 4f14be524c..464cd472eb 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -465,7 +465,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.disable_fps_limit); ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); - ReadSetting("Renderer", Settings::values.use_nvdec_emulation); + ReadSetting("Renderer", Settings::values.nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); ReadSetting("Renderer", Settings::values.use_caches_gc); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index e02eceb990..72f3213fb1 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -261,9 +261,9 @@ shader_backend = # 0 (default): Off, 1: On use_asynchronous_shaders = -# Enable NVDEC emulation. -# 0: Off, 1 (default): On -use_nvdec_emulation = +# NVDEC emulation. +# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding +nvdec_emulation = # Accelerate ASTC texture decoding. # 0: Off, 1 (default): On From b384129c63c604d8087f72a880adfdc6c68ab9a0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 10 Aug 2021 22:26:06 -0400 Subject: [PATCH 8/8] h264: Lower max_num_ref_frames GPU decoding seems to be more picky when it comes to the maximum number of reference frames. --- src/video_core/command_classes/codecs/h264.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 5fb6d45eeb..51ee14c136 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -95,7 +95,8 @@ const std::vector& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); - writer.WriteUe(16); + // TODO (ameerj): Where do we get this number, it seems to be particular for each stream + writer.WriteUe(6); // Max number of reference frames writer.WriteBit(false); writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); writer.WriteUe(pic_height - 1);