forked from suyu/suyu
core_timing: Use higher precision sleeps on Windows
The precision of sleep_for and wait_for is limited to 1-1.5ms on Windows. Using SleepForOneTick() allows us to sleep for exactly one interval of the current timer resolution. This allows us to take advantage of systems that have a timer resolution of 0.5ms to reduce CPU overhead in the event loop.
This commit is contained in:
parent
7e353082ac
commit
bff1453282
5 changed files with 47 additions and 24 deletions
|
@ -81,4 +81,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
|||
|
||||
#endif
|
||||
|
||||
std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
|
||||
u64 emulated_clock_frequency) {
|
||||
return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
|
|
|
@ -55,4 +55,7 @@ private:
|
|||
[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
||||
u64 emulated_clock_frequency);
|
||||
|
||||
[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
|
||||
u64 emulated_clock_frequency);
|
||||
|
||||
} // namespace Common
|
||||
|
|
|
@ -6,6 +6,10 @@
|
|||
#include <string>
|
||||
#include <tuple>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "common/windows/timer_resolution.h"
|
||||
#endif
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
|
@ -38,7 +42,8 @@ struct CoreTiming::Event {
|
|||
};
|
||||
|
||||
CoreTiming::CoreTiming()
|
||||
: clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
|
||||
: cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)},
|
||||
event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
|
||||
|
||||
CoreTiming::~CoreTiming() {
|
||||
Reset();
|
||||
|
@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() {
|
|||
}
|
||||
|
||||
u64 CoreTiming::GetCPUTicks() const {
|
||||
if (is_multicore) {
|
||||
return clock->GetCPUCycles();
|
||||
if (is_multicore) [[likely]] {
|
||||
return cpu_clock->GetCPUCycles();
|
||||
}
|
||||
return ticks;
|
||||
}
|
||||
|
||||
u64 CoreTiming::GetClockTicks() const {
|
||||
if (is_multicore) {
|
||||
return clock->GetClockCycles();
|
||||
if (is_multicore) [[likely]] {
|
||||
return cpu_clock->GetClockCycles();
|
||||
}
|
||||
return CpuCyclesToClockCycles(ticks);
|
||||
}
|
||||
|
@ -252,22 +257,21 @@ void CoreTiming::ThreadLoop() {
|
|||
const auto next_time = Advance();
|
||||
if (next_time) {
|
||||
// There are more events left in the queue, wait until the next event.
|
||||
const auto wait_time = *next_time - GetGlobalTimeNs().count();
|
||||
auto wait_time = *next_time - GetGlobalTimeNs().count();
|
||||
if (wait_time > 0) {
|
||||
#ifdef _WIN32
|
||||
// Assume a timer resolution of 1ms.
|
||||
static constexpr s64 TimerResolutionNS = 1000000;
|
||||
const auto timer_resolution_ns =
|
||||
Common::Windows::GetCurrentTimerResolution().count();
|
||||
|
||||
// Sleep in discrete intervals of the timer resolution, and spin the rest.
|
||||
const auto sleep_time = wait_time - (wait_time % TimerResolutionNS);
|
||||
if (sleep_time > 0) {
|
||||
event.WaitFor(std::chrono::nanoseconds(sleep_time));
|
||||
}
|
||||
while (!paused && !event.IsSet() && wait_time > 0) {
|
||||
wait_time = *next_time - GetGlobalTimeNs().count();
|
||||
|
||||
while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) {
|
||||
// Yield to reduce thread starvation.
|
||||
if (wait_time >= timer_resolution_ns) {
|
||||
Common::Windows::SleepForOneTick();
|
||||
} else {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
}
|
||||
|
||||
if (event.IsSet()) {
|
||||
event.Reset();
|
||||
|
@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() {
|
|||
}
|
||||
|
||||
paused_set = true;
|
||||
clock->Pause(true);
|
||||
event_clock->Pause(true);
|
||||
pause_event.Wait();
|
||||
clock->Pause(false);
|
||||
event_clock->Pause(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -303,16 +307,23 @@ void CoreTiming::Reset() {
|
|||
has_started = false;
|
||||
}
|
||||
|
||||
std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const {
|
||||
if (is_multicore) [[likely]] {
|
||||
return cpu_clock->GetTimeNS();
|
||||
}
|
||||
return CyclesToNs(ticks);
|
||||
}
|
||||
|
||||
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
|
||||
if (is_multicore) {
|
||||
return clock->GetTimeNS();
|
||||
if (is_multicore) [[likely]] {
|
||||
return event_clock->GetTimeNS();
|
||||
}
|
||||
return CyclesToNs(ticks);
|
||||
}
|
||||
|
||||
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
|
||||
if (is_multicore) {
|
||||
return clock->GetTimeUS();
|
||||
if (is_multicore) [[likely]] {
|
||||
return event_clock->GetTimeUS();
|
||||
}
|
||||
return CyclesToUs(ticks);
|
||||
}
|
||||
|
|
|
@ -122,6 +122,9 @@ public:
|
|||
/// Returns current time in emulated in Clock cycles
|
||||
u64 GetClockTicks() const;
|
||||
|
||||
/// Returns current time in nanoseconds.
|
||||
std::chrono::nanoseconds GetCPUTimeNs() const;
|
||||
|
||||
/// Returns current time in microseconds.
|
||||
std::chrono::microseconds GetGlobalTimeUs() const;
|
||||
|
||||
|
@ -139,7 +142,8 @@ private:
|
|||
|
||||
void Reset();
|
||||
|
||||
std::unique_ptr<Common::WallClock> clock;
|
||||
std::unique_ptr<Common::WallClock> cpu_clock;
|
||||
std::unique_ptr<Common::WallClock> event_clock;
|
||||
|
||||
s64 global_timer = 0;
|
||||
|
||||
|
|
|
@ -197,7 +197,7 @@ struct GPU::Impl {
|
|||
constexpr u64 gpu_ticks_num = 384;
|
||||
constexpr u64 gpu_ticks_den = 625;
|
||||
|
||||
u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
|
||||
u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
|
||||
if (Settings::values.use_fast_gpu_time.GetValue()) {
|
||||
nanoseconds /= 256;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue