diff --git a/CMakeLists.txt b/CMakeLists.txt
index f26a0c6b82..91ec50befe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -477,8 +477,8 @@ if (APPLE)
find_library(COCOA_LIBRARY Cocoa)
set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY})
elseif (WIN32)
- # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
- add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
+ # Target Windows 10
+ add_definitions(-D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00)
set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi)
if (MINGW)
# PSAPI is the Process Status API
diff --git a/dist/yuzu.manifest b/dist/yuzu.manifest
index 10a8df9b57..f2c8639a20 100644
--- a/dist/yuzu.manifest
+++ b/dist/yuzu.manifest
@@ -36,12 +36,6 @@ SPDX-License-Identifier: GPL-2.0-or-later
-
-
-
-
-
-
+#else
+#include
+#endif
+
+#include "common/steady_clock.h"
+
+namespace Common {
+
+#ifdef _WIN32
+static s64 WindowsQueryPerformanceFrequency() {
+ LARGE_INTEGER frequency;
+ QueryPerformanceFrequency(&frequency);
+ return frequency.QuadPart;
+}
+
+static s64 WindowsQueryPerformanceCounter() {
+ LARGE_INTEGER counter;
+ QueryPerformanceCounter(&counter);
+ return counter.QuadPart;
+}
+#endif
+
+SteadyClock::time_point SteadyClock::Now() noexcept {
+#if defined(_WIN32)
+ static const auto freq = WindowsQueryPerformanceFrequency();
+ const auto counter = WindowsQueryPerformanceCounter();
+
+ // 10 MHz is a very common QPC frequency on modern PCs.
+ // Optimizing for this specific frequency can double the performance of
+ // this function by avoiding the expensive frequency conversion path.
+ static constexpr s64 TenMHz = 10'000'000;
+
+ if (freq == TenMHz) [[likely]] {
+ static_assert(period::den % TenMHz == 0);
+ static constexpr s64 Multiplier = period::den / TenMHz;
+ return time_point{duration{counter * Multiplier}};
+ }
+
+ const auto whole = (counter / freq) * period::den;
+ const auto part = (counter % freq) * period::den / freq;
+ return time_point{duration{whole + part}};
+#elif defined(__APPLE__)
+ return time_point{duration{clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW)}};
+#else
+ timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return time_point{std::chrono::seconds{ts.tv_sec} + std::chrono::nanoseconds{ts.tv_nsec}};
+#endif
+}
+
+}; // namespace Common
diff --git a/src/common/steady_clock.h b/src/common/steady_clock.h
new file mode 100644
index 0000000000..9497cf865d
--- /dev/null
+++ b/src/common/steady_clock.h
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include
+
+#include "common/common_types.h"
+
+namespace Common {
+
+struct SteadyClock {
+ using rep = s64;
+ using period = std::nano;
+ using duration = std::chrono::nanoseconds;
+ using time_point = std::chrono::time_point;
+
+ static constexpr bool is_steady = true;
+
+ [[nodiscard]] static time_point Now() noexcept;
+};
+
+} // namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index ae07f28116..817e71d52f 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/steady_clock.h"
#include "common/uint128.h"
#include "common/wall_clock.h"
@@ -11,45 +12,32 @@
namespace Common {
-using base_timer = std::chrono::steady_clock;
-using base_time_point = std::chrono::time_point;
-
class StandardWallClock final : public WallClock {
public:
explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
- : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) {
- start_time = base_timer::now();
- }
+ : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false},
+ start_time{SteadyClock::Now()} {}
std::chrono::nanoseconds GetTimeNS() override {
- base_time_point current = base_timer::now();
- auto elapsed = current - start_time;
- return std::chrono::duration_cast(elapsed);
+ return SteadyClock::Now() - start_time;
}
std::chrono::microseconds GetTimeUS() override {
- base_time_point current = base_timer::now();
- auto elapsed = current - start_time;
- return std::chrono::duration_cast(elapsed);
+ return std::chrono::duration_cast(GetTimeNS());
}
std::chrono::milliseconds GetTimeMS() override {
- base_time_point current = base_timer::now();
- auto elapsed = current - start_time;
- return std::chrono::duration_cast(elapsed);
+ return std::chrono::duration_cast(GetTimeNS());
}
u64 GetClockCycles() override {
- std::chrono::nanoseconds time_now = GetTimeNS();
- const u128 temporary =
- Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
- return Common::Divide128On32(temporary, 1000000000).first;
+ const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency);
+ return Common::Divide128On32(temp, NS_RATIO).first;
}
u64 GetCPUCycles() override {
- std::chrono::nanoseconds time_now = GetTimeNS();
- const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
- return Common::Divide128On32(temporary, 1000000000).first;
+ const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency);
+ return Common::Divide128On32(temp, NS_RATIO).first;
}
void Pause([[maybe_unused]] bool is_paused) override {
@@ -57,7 +45,7 @@ public:
}
private:
- base_time_point start_time;
+ SteadyClock::time_point start_time;
};
#ifdef ARCHITECTURE_x86_64
@@ -93,4 +81,9 @@ std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency,
#endif
+std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency,
+ u64 emulated_clock_frequency) {
+ return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency);
+}
+
} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index 828a523a86..157ec5eaea 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -55,4 +55,7 @@ private:
[[nodiscard]] std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency,
u64 emulated_clock_frequency);
+[[nodiscard]] std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency,
+ u64 emulated_clock_frequency);
+
} // namespace Common
diff --git a/src/common/windows/timer_resolution.cpp b/src/common/windows/timer_resolution.cpp
new file mode 100644
index 0000000000..29c6e5c7e1
--- /dev/null
+++ b/src/common/windows/timer_resolution.cpp
@@ -0,0 +1,109 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include
+
+#include "common/windows/timer_resolution.h"
+
+extern "C" {
+// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtQueryTimerResolution.html
+NTSYSAPI LONG NTAPI NtQueryTimerResolution(PULONG MinimumResolution, PULONG MaximumResolution,
+ PULONG CurrentResolution);
+
+// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtSetTimerResolution.html
+NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetResolution,
+ PULONG CurrentResolution);
+
+// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FThread%2FNtDelayExecution.html
+NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval);
+}
+
+// Defines for compatibility with older Windows 10 SDKs.
+
+#ifndef PROCESS_POWER_THROTTLING_EXECUTION_SPEED
+#define PROCESS_POWER_THROTTLING_EXECUTION_SPEED 0x1
+#endif
+#ifndef PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION
+#define PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION 0x4
+#endif
+
+namespace Common::Windows {
+
+namespace {
+
+using namespace std::chrono;
+
+constexpr nanoseconds ToNS(ULONG hundred_ns) {
+ return nanoseconds{hundred_ns * 100};
+}
+
+constexpr ULONG ToHundredNS(nanoseconds ns) {
+ return static_cast(ns.count()) / 100;
+}
+
+struct TimerResolution {
+ std::chrono::nanoseconds minimum;
+ std::chrono::nanoseconds maximum;
+ std::chrono::nanoseconds current;
+};
+
+TimerResolution GetTimerResolution() {
+ ULONG MinimumTimerResolution;
+ ULONG MaximumTimerResolution;
+ ULONG CurrentTimerResolution;
+ NtQueryTimerResolution(&MinimumTimerResolution, &MaximumTimerResolution,
+ &CurrentTimerResolution);
+ return {
+ .minimum{ToNS(MinimumTimerResolution)},
+ .maximum{ToNS(MaximumTimerResolution)},
+ .current{ToNS(CurrentTimerResolution)},
+ };
+}
+
+void SetHighQoS() {
+ // https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service
+ PROCESS_POWER_THROTTLING_STATE PowerThrottling{
+ .Version{PROCESS_POWER_THROTTLING_CURRENT_VERSION},
+ .ControlMask{PROCESS_POWER_THROTTLING_EXECUTION_SPEED |
+ PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION},
+ .StateMask{},
+ };
+ SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &PowerThrottling,
+ sizeof(PROCESS_POWER_THROTTLING_STATE));
+}
+
+} // Anonymous namespace
+
+nanoseconds GetMinimumTimerResolution() {
+ return GetTimerResolution().minimum;
+}
+
+nanoseconds GetMaximumTimerResolution() {
+ return GetTimerResolution().maximum;
+}
+
+nanoseconds GetCurrentTimerResolution() {
+ return GetTimerResolution().current;
+}
+
+nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) {
+ // Set the timer resolution, and return the current timer resolution.
+ const auto DesiredTimerResolution = ToHundredNS(timer_resolution);
+ ULONG CurrentTimerResolution;
+ NtSetTimerResolution(DesiredTimerResolution, TRUE, &CurrentTimerResolution);
+ return ToNS(CurrentTimerResolution);
+}
+
+nanoseconds SetCurrentTimerResolutionToMaximum() {
+ SetHighQoS();
+ return SetCurrentTimerResolution(GetMaximumTimerResolution());
+}
+
+void SleepForOneTick() {
+ LARGE_INTEGER DelayInterval{
+ .QuadPart{-1},
+ };
+ NtDelayExecution(FALSE, &DelayInterval);
+}
+
+} // namespace Common::Windows
diff --git a/src/common/windows/timer_resolution.h b/src/common/windows/timer_resolution.h
new file mode 100644
index 0000000000..e1e50a62dc
--- /dev/null
+++ b/src/common/windows/timer_resolution.h
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include
+
+namespace Common::Windows {
+
+/// Returns the minimum (least precise) supported timer resolution in nanoseconds.
+std::chrono::nanoseconds GetMinimumTimerResolution();
+
+/// Returns the maximum (most precise) supported timer resolution in nanoseconds.
+std::chrono::nanoseconds GetMaximumTimerResolution();
+
+/// Returns the current timer resolution in nanoseconds.
+std::chrono::nanoseconds GetCurrentTimerResolution();
+
+/**
+ * Sets the current timer resolution.
+ *
+ * @param timer_resolution Timer resolution in nanoseconds.
+ *
+ * @returns The current timer resolution.
+ */
+std::chrono::nanoseconds SetCurrentTimerResolution(std::chrono::nanoseconds timer_resolution);
+
+/**
+ * Sets the current timer resolution to the maximum supported timer resolution.
+ *
+ * @returns The current timer resolution.
+ */
+std::chrono::nanoseconds SetCurrentTimerResolutionToMaximum();
+
+/// Sleep for one tick of the current timer resolution.
+void SleepForOneTick();
+
+} // namespace Common::Windows
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 8b08332ab4..bc1a973b06 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -6,6 +6,7 @@
#include
#include "common/atomic_ops.h"
+#include "common/steady_clock.h"
#include "common/uint128.h"
#include "common/x64/native_clock.h"
@@ -39,6 +40,12 @@ static u64 FencedRDTSC() {
}
#endif
+template
+static u64 RoundToNearest(u64 value) {
+ const auto mod = value % Nearest;
+ return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
+}
+
u64 EstimateRDTSCFrequency() {
// Discard the first result measuring the rdtsc.
FencedRDTSC();
@@ -46,18 +53,18 @@ u64 EstimateRDTSCFrequency() {
FencedRDTSC();
// Get the current time.
- const auto start_time = std::chrono::steady_clock::now();
+ const auto start_time = Common::SteadyClock::Now();
const u64 tsc_start = FencedRDTSC();
- // Wait for 200 milliseconds.
- std::this_thread::sleep_for(std::chrono::milliseconds{200});
- const auto end_time = std::chrono::steady_clock::now();
+ // Wait for 250 milliseconds.
+ std::this_thread::sleep_for(std::chrono::milliseconds{250});
+ const auto end_time = Common::SteadyClock::Now();
const u64 tsc_end = FencedRDTSC();
// Calculate differences.
const u64 timer_diff = static_cast(
std::chrono::duration_cast(end_time - start_time).count());
const u64 tsc_diff = tsc_end - tsc_start;
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
- return tsc_freq;
+ return RoundToNearest<1000>(tsc_freq);
}
namespace X64 {
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 3a63b52e3f..742cfb9967 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -6,6 +6,10 @@
#include
#include
+#ifdef _WIN32
+#include "common/windows/timer_resolution.h"
+#endif
+
#include "common/microprofile.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
@@ -38,7 +42,8 @@ struct CoreTiming::Event {
};
CoreTiming::CoreTiming()
- : clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
+ : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)},
+ event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
CoreTiming::~CoreTiming() {
Reset();
@@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() {
}
u64 CoreTiming::GetCPUTicks() const {
- if (is_multicore) {
- return clock->GetCPUCycles();
+ if (is_multicore) [[likely]] {
+ return cpu_clock->GetCPUCycles();
}
return ticks;
}
u64 CoreTiming::GetClockTicks() const {
- if (is_multicore) {
- return clock->GetClockCycles();
+ if (is_multicore) [[likely]] {
+ return cpu_clock->GetClockCycles();
}
return CpuCyclesToClockCycles(ticks);
}
@@ -252,21 +257,20 @@ void CoreTiming::ThreadLoop() {
const auto next_time = Advance();
if (next_time) {
// There are more events left in the queue, wait until the next event.
- const auto wait_time = *next_time - GetGlobalTimeNs().count();
+ auto wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time > 0) {
#ifdef _WIN32
- // Assume a timer resolution of 1ms.
- static constexpr s64 TimerResolutionNS = 1000000;
+ const auto timer_resolution_ns =
+ Common::Windows::GetCurrentTimerResolution().count();
- // Sleep in discrete intervals of the timer resolution, and spin the rest.
- const auto sleep_time = wait_time - (wait_time % TimerResolutionNS);
- if (sleep_time > 0) {
- event.WaitFor(std::chrono::nanoseconds(sleep_time));
- }
+ while (!paused && !event.IsSet() && wait_time > 0) {
+ wait_time = *next_time - GetGlobalTimeNs().count();
- while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) {
- // Yield to reduce thread starvation.
- std::this_thread::yield();
+ if (wait_time >= timer_resolution_ns) {
+ Common::Windows::SleepForOneTick();
+ } else {
+ std::this_thread::yield();
+ }
}
if (event.IsSet()) {
@@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() {
}
paused_set = true;
- clock->Pause(true);
+ event_clock->Pause(true);
pause_event.Wait();
- clock->Pause(false);
+ event_clock->Pause(false);
}
}
@@ -303,16 +307,23 @@ void CoreTiming::Reset() {
has_started = false;
}
+std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const {
+ if (is_multicore) [[likely]] {
+ return cpu_clock->GetTimeNS();
+ }
+ return CyclesToNs(ticks);
+}
+
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
- if (is_multicore) {
- return clock->GetTimeNS();
+ if (is_multicore) [[likely]] {
+ return event_clock->GetTimeNS();
}
return CyclesToNs(ticks);
}
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
- if (is_multicore) {
- return clock->GetTimeUS();
+ if (is_multicore) [[likely]] {
+ return event_clock->GetTimeUS();
}
return CyclesToUs(ticks);
}
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index da366637be..4b89c0c39b 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -122,6 +122,9 @@ public:
/// Returns current time in emulated in Clock cycles
u64 GetClockTicks() const;
+ /// Returns current time in nanoseconds.
+ std::chrono::nanoseconds GetCPUTimeNs() const;
+
/// Returns current time in microseconds.
std::chrono::microseconds GetGlobalTimeUs() const;
@@ -139,7 +142,8 @@ private:
void Reset();
- std::unique_ptr clock;
+ std::unique_ptr cpu_clock;
+ std::unique_ptr event_clock;
s64 global_timer = 0;
diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h
index 45567b8404..191c28bb46 100644
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -13,11 +13,9 @@ namespace Core {
namespace Hardware {
-// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
-// The exact value used is of course unverified.
-constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked
-constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed
-constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
+constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz
+constexpr u64 CNTFREQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
+constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
// Virtual to Physical core map.
constexpr std::array()> VirtualToPhysicalCoreMap{
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 7024a19cf8..2e7f9c5edb 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -197,7 +197,7 @@ struct GPU::Impl {
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
- u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
+ u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
if (Settings::values.use_fast_gpu_time.GetValue()) {
nanoseconds /= 256;
}
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index f233b065ea..c092507f41 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -91,6 +91,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "common/microprofile.h"
#include "common/scm_rev.h"
#include "common/scope_exit.h"
+#ifdef _WIN32
+#include "common/windows/timer_resolution.h"
+#endif
#ifdef ARCHITECTURE_x86_64
#include "common/x64/cpu_detect.h"
#endif
@@ -377,6 +380,12 @@ GMainWindow::GMainWindow(std::unique_ptr config_, bool has_broken_vulkan
LOG_INFO(Frontend, "Host RAM: {:.2f} GiB",
Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB});
LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB});
+#ifdef _WIN32
+ LOG_INFO(Frontend, "Host Timer Resolution: {:.4f} ms",
+ std::chrono::duration_cast>(
+ Common::Windows::SetCurrentTimerResolutionToMaximum())
+ .count());
+#endif
UpdateWindowTitle();
show();
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 77edd58ca4..5f39ece324 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -42,6 +42,8 @@
#include
#include
+
+#include "common/windows/timer_resolution.h"
#endif
#undef _UNICODE
@@ -314,6 +316,8 @@ int main(int argc, char** argv) {
#ifdef _WIN32
LocalFree(argv_w);
+
+ Common::Windows::SetCurrentTimerResolutionToMaximum();
#endif
MicroProfileOnThreadCreate("EmuThread");