Core Timing: Rework Core Timing to run all cores evenly.

2019-09-09 21:37:29 -04:00 · 2019-09-09 21:37:29 -04:00 · 555866f8dc
commit 555866f8dc
parent e664c24355
6 changed files with 89 additions and 38 deletions
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@ -116,7 +116,7 @@ public:
        num_interpreted_instructions = 0;
    }
    u64 GetTicksRemaining() override {
-        return std::max(parent.system.CoreTiming().GetDowncount(), 0);
+        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0LL);
    }
    u64 GetCNTPCT() override {
        return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@ -156,7 +156,7 @@ void ARM_Unicorn::Run() {
    if (GDBStub::IsServerEnabled()) {
        ExecuteInstructions(std::max(4000000, 0));
    } else {
-        ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0));
+        ExecuteInstructions(std::max<s64>(system.CoreTiming().GetDowncount(), 0LL));
    }
 }

--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@ -85,24 +85,16 @@ void Cpu::RunLoop(bool tight_loop) {
    // instead advance to the next event and try to yield to the next thread
    if (Kernel::GetCurrentThread() == nullptr) {
        LOG_TRACE(Core, "Core-{} idling", core_index);
-
-        if (IsMainCore()) {
-            // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
        core_timing.Idle();
        core_timing.Advance();
-        }
-
        PrepareReschedule();
    } else {
-        if (IsMainCore()) {
-            core_timing.Advance();
-        }
-
        if (tight_loop) {
            arm_interface->Run();
        } else {
            arm_interface->Step();
        }
+        core_timing.Advance();
    }

    Reschedule();
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@ -15,7 +15,7 @@

 namespace Core::Timing {

-constexpr int MAX_SLICE_LENGTH = 20000;
+constexpr int MAX_SLICE_LENGTH = 10000;

 struct CoreTiming::Event {
    s64 time;
@ -38,10 +38,14 @@ CoreTiming::CoreTiming() = default;
 CoreTiming::~CoreTiming() = default;

 void CoreTiming::Initialize() {
-    downcount = MAX_SLICE_LENGTH;
+    for (std::size_t core = 0; core < num_cpu_cores; core++) {
+        downcounts[core] = MAX_SLICE_LENGTH;
+        time_slice[core] = MAX_SLICE_LENGTH;
+    }
    slice_length = MAX_SLICE_LENGTH;
    global_timer = 0;
    idled_cycles = 0;
+    current_context = 0;

    // The time between CoreTiming being initialized and the first call to Advance() is considered
    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
@ -110,7 +114,7 @@ void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
 u64 CoreTiming::GetTicks() const {
    u64 ticks = static_cast<u64>(global_timer);
    if (!is_global_timer_sane) {
-        ticks += slice_length - downcount;
+        ticks += time_slice[current_context] - downcounts[current_context];
    }
    return ticks;
 }
@ -120,7 +124,7 @@ u64 CoreTiming::GetIdleTicks() const {
 }

 void CoreTiming::AddTicks(u64 ticks) {
-    downcount -= static_cast<int>(ticks);
+    downcounts[current_context] -= static_cast<s64>(ticks);
 }

 void CoreTiming::ClearPendingEvents() {
@ -141,22 +145,36 @@ void CoreTiming::RemoveEvent(const EventType* event_type) {

 void CoreTiming::ForceExceptionCheck(s64 cycles) {
    cycles = std::max<s64>(0, cycles);
-    if (downcount <= cycles) {
+    if (downcounts[current_context] <= cycles) {
        return;
    }

    // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
    // here. Account for cycles already executed by adjusting the g.slice_length
-    slice_length -= downcount - static_cast<int>(cycles);
-    downcount = static_cast<int>(cycles);
+    slice_length -= downcounts[current_context] - static_cast<int>(cycles);
+    downcounts[current_context] = static_cast<int>(cycles);
+}
+
+std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const {
+    const u64 original_context = current_context;
+    u64 next_context = (original_context + 1) % num_cpu_cores;
+    while (next_context != original_context) {
+        if (time_slice[next_context] >= needed_ticks) {
+            return {next_context};
+        } else if (time_slice[next_context] >= 0) {
+            return {};
+        }
+        next_context = (next_context + 1) % num_cpu_cores;
+    }
+    return {};
 }

 void CoreTiming::Advance() {
    std::unique_lock<std::mutex> guard(inner_mutex);

-    const int cycles_executed = slice_length - downcount;
+    const int cycles_executed = time_slice[current_context] - downcounts[current_context];
+    time_slice[current_context] = std::max<s64>(0, downcounts[current_context]);
    global_timer += cycles_executed;
-    slice_length = MAX_SLICE_LENGTH;

    is_global_timer_sane = true;

@ -173,24 +191,40 @@ void CoreTiming::Advance() {

    // Still events left (scheduled in the future)
    if (!event_queue.empty()) {
-        slice_length = static_cast<int>(
-            std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH));
+        s64 needed_ticks = std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
+        const auto next_core = NextAvailableCore(needed_ticks);
+        if (next_core) {
+            downcounts[*next_core] = needed_ticks;
+        }
    }

-    downcount = slice_length;
+    downcounts[current_context] = time_slice[current_context];
+}
+
+void CoreTiming::ResetRun() {
+    for (std::size_t core = 0; core < num_cpu_cores; core++) {
+        downcounts[core] = MAX_SLICE_LENGTH;
+        time_slice[core] = MAX_SLICE_LENGTH;
+    }
+    current_context = 0;
+    // Still events left (scheduled in the future)
+    if (!event_queue.empty()) {
+        s64 needed_ticks = std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
+        downcounts[current_context] = needed_ticks;
+    }
 }

 void CoreTiming::Idle() {
-    idled_cycles += downcount;
-    downcount = 0;
+    idled_cycles += downcounts[current_context];
+    downcounts[current_context] = 0;
 }

 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
    return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
 }

-int CoreTiming::GetDowncount() const {
-    return downcount;
+s64 CoreTiming::GetDowncount() const {
+    return downcounts[current_context];
 }

 } // namespace Core::Timing
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@ -7,6 +7,7 @@
 #include <chrono>
 #include <functional>
 #include <mutex>
+#include <optional>
 #include <string>
 #include <unordered_map>
 #include <vector>
@ -104,7 +105,19 @@ public:

    std::chrono::microseconds GetGlobalTimeUs() const;

-    int GetDowncount() const;
+    void ResetRun();
+
+    s64 GetDowncount() const;
+
+    void SwitchContext(u64 new_context) {
+        current_context = new_context;
+    }
+
+    bool CurrentContextCanRun() const {
+        return time_slice[current_context] > 0;
+    }
+
+    std::optional<u64> NextAvailableCore(const s64 needed_ticks) const;

 private:
    struct Event;
@ -112,10 +125,15 @@ private:
    /// Clear all pending events. This should ONLY be done on exit.
    void ClearPendingEvents();

+    static constexpr u64 num_cpu_cores = 4;
+
    s64 global_timer = 0;
    s64 idled_cycles = 0;
-    int slice_length = 0;
-    int downcount = 0;
+    s64 slice_length = 0;
+    std::array<s64, num_cpu_cores> downcounts{};
+    // Slice of time assigned to each core per run.
+    std::array<s64, num_cpu_cores> time_slice{};
+    u64 current_context = 0;

    // Are we in a function that has been called from Advance()
    // If events are scheduled from a function that gets called from Advance(),
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@ -6,6 +6,7 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
 #include "core/core_cpu.h"
+#include "core/core_timing.h"
 #include "core/cpu_core_manager.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/settings.h"
@ -122,13 +123,19 @@ void CpuCoreManager::RunLoop(bool tight_loop) {
        }
    }

+    auto& core_timing = system.CoreTiming();
+    core_timing.ResetRun();
+    bool keep_running{};
+    do {
+        keep_running = false;
        for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
+            core_timing.SwitchContext(active_core);
+            if (core_timing.CurrentContextCanRun()) {
                cores[active_core]->RunLoop(tight_loop);
-        if (Settings::values.use_multi_core) {
-            // Cores 1-3 are run on other threads in this mode
-            break;
            }
+            keep_running |= core_timing.CurrentContextCanRun();
        }
+    } while (keep_running);

    if (GDBStub::IsServerEnabled()) {
        GDBStub::SetCpuStepFlag(false);