1
1
Fork 0
forked from suyu/suyu

SingleCore: Use Cycle Timing instead of Host Timing.

This commit is contained in:
Fernando Sahmkow 2020-03-28 15:23:28 -04:00
parent 9bde28d7b1
commit f5e32935ca
15 changed files with 152 additions and 80 deletions

View file

@ -26,8 +26,9 @@ using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CO
/// Generic ARMv8 CPU interface
class ARM_Interface : NonCopyable {
public:
explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers)
: system{system_}, interrupt_handlers{interrupt_handlers} {}
explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock)
: system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{
uses_wall_clock} {}
virtual ~ARM_Interface() = default;
struct ThreadContext32 {
@ -186,6 +187,7 @@ protected:
/// System context that this ARM interface is running under.
System& system;
CPUInterrupts& interrupt_handlers;
bool uses_wall_clock;
};
} // namespace Core

View file

@ -72,23 +72,35 @@ public:
}
void AddTicks(u64 ticks) override {
this->ticks -= ticks;
if (parent.uses_wall_clock) {
return;
}
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
// if not all cores are doing a similar amount of work. Instead of doing this, we should
// device a way so that timing is consistent across all cores without increasing the ticks 4
// times.
u64 amortized_ticks =
(ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
// Always execute at least one tick.
amortized_ticks = std::max<u64>(amortized_ticks, 1);
parent.system.CoreTiming().AddTicks(amortized_ticks);
num_interpreted_instructions = 0;
}
u64 GetTicksRemaining() override {
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return std::max<s64>(ticks, 0);
if (parent.uses_wall_clock) {
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return std::max<s64>(1000U, 0);
}
return 0ULL;
}
return 0ULL;
}
void ResetTicks() {
ticks = 1000LL;
return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
}
ARM_Dynarmic_32& parent;
std::size_t num_interpreted_instructions{};
s64 ticks{};
};
std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@ -103,7 +115,6 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
}
void ARM_Dynarmic_32::Run() {
cb->ResetTicks();
jit->Run();
}
@ -112,8 +123,10 @@ void ARM_Dynarmic_32::Step() {
}
ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
: ARM_Interface{system, interrupt_handlers}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
std::size_t core_index)
: ARM_Interface{system, interrupt_handlers, uses_wall_clock},
cb(std::make_unique<DynarmicCallbacks32>(*this)),
cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}

View file

@ -29,7 +29,7 @@ class System;
class ARM_Dynarmic_32 final : public ARM_Interface {
public:
ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
~ARM_Dynarmic_32() override;

View file

@ -124,29 +124,41 @@ public:
}
void AddTicks(u64 ticks) override {
this->ticks -= ticks;
if (parent.uses_wall_clock) {
return;
}
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
// if not all cores are doing a similar amount of work. Instead of doing this, we should
// device a way so that timing is consistent across all cores without increasing the ticks 4
// times.
u64 amortized_ticks =
(ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
// Always execute at least one tick.
amortized_ticks = std::max<u64>(amortized_ticks, 1);
parent.system.CoreTiming().AddTicks(amortized_ticks);
num_interpreted_instructions = 0;
}
u64 GetTicksRemaining() override {
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return std::max<s64>(ticks, 0);
if (parent.uses_wall_clock) {
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return std::max<s64>(1000U, 0);
}
return 0ULL;
}
return 0ULL;
return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
}
u64 GetCNTPCT() override {
return parent.system.CoreTiming().GetClockTicks();
}
void ResetTicks() {
ticks = 1000LL;
}
ARM_Dynarmic_64& parent;
std::size_t num_interpreted_instructions = 0;
u64 tpidrro_el0 = 0;
u64 tpidr_el0 = 0;
s64 ticks{};
};
std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
@ -185,13 +197,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
}
// CNTPCT uses wall clock.
config.wall_clock_cntpct = true;
config.wall_clock_cntpct = uses_wall_clock;
return std::make_shared<Dynarmic::A64::Jit>(config);
}
void ARM_Dynarmic_64::Run() {
cb->ResetTicks();
jit->Run();
}
@ -200,9 +211,11 @@ void ARM_Dynarmic_64::Step() {
}
ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
: ARM_Interface{system, interrupt_handler},
bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
std::size_t core_index)
: ARM_Interface{system, interrupt_handler, uses_wall_clock},
cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handler,
uses_wall_clock,
ARM_Unicorn::Arch::AArch64,
core_index},
core_index{core_index}, exclusive_monitor{

View file

@ -28,7 +28,7 @@ class System;
class ARM_Dynarmic_64 final : public ARM_Interface {
public:
ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
~ARM_Dynarmic_64() override;

View file

@ -63,9 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
return false;
}
ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
std::size_t core_index)
: ARM_Interface{system, interrupt_handler}, core_index{core_index} {
ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
bool uses_wall_clock, Arch architecture, std::size_t core_index)
: ARM_Interface{system, interrupt_handler, uses_wall_clock}, core_index{core_index} {
const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
CHECKED(uc_open(arch, UC_MODE_ARM, &uc));

View file

@ -20,8 +20,8 @@ public:
AArch64, // 64-bit ARM
};
explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
std::size_t core_index);
explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
bool uses_wall_clock, Arch architecture, std::size_t core_index);
~ARM_Unicorn() override;
void SetPC(u64 pc) override;

View file

@ -14,6 +14,8 @@
namespace Core::Timing {
constexpr u64 MAX_SLICE_LENGTH = 4000;
std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
return std::make_shared<EventType>(std::move(callback), std::move(name));
}
@ -53,6 +55,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
on_thread_init = std::move(on_thread_init_);
event_fifo_id = 0;
ticks = 0;
const auto empty_timed_callback = [](u64, s64) {};
ev_lost = CreateEvent("_lost_event", empty_timed_callback);
if (is_multicore) {
@ -126,20 +129,36 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
basic_lock.unlock();
}
void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
ticks_count[core_index] += ticks;
void CoreTiming::AddTicks(u64 ticks) {
this->ticks += ticks;
downcount -= ticks;
}
void CoreTiming::ResetTicks(std::size_t core_index) {
ticks_count[core_index] = 0;
void CoreTiming::Idle() {
if (!event_queue.empty()) {
u64 next_event_time = event_queue.front().time;
ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
return;
}
ticks += 1000U;
}
void CoreTiming::ResetTicks() {
downcount = MAX_SLICE_LENGTH;
}
u64 CoreTiming::GetCPUTicks() const {
return clock->GetCPUCycles();
if (is_multicore) {
return clock->GetCPUCycles();
}
return ticks;
}
u64 CoreTiming::GetClockTicks() const {
return clock->GetClockCycles();
if (is_multicore) {
return clock->GetClockCycles();
}
return CpuCyclesToClockCycles(ticks);
}
void CoreTiming::ClearPendingEvents() {
@ -217,11 +236,17 @@ void CoreTiming::ThreadLoop() {
}
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
return clock->GetTimeNS();
if (is_multicore) {
return clock->GetTimeNS();
}
return CyclesToNs(ticks);
}
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
return clock->GetTimeUS();
if (is_multicore) {
return clock->GetTimeUS();
}
return CyclesToUs(ticks);
}
} // namespace Core::Timing

View file

@ -98,9 +98,15 @@ public:
/// We only permit one event of each type in the queue at a time.
void RemoveEvent(const std::shared_ptr<EventType>& event_type);
void AddTicks(std::size_t core_index, u64 ticks);
void AddTicks(u64 ticks);
void ResetTicks(std::size_t core_index);
void ResetTicks();
void Idle();
s64 GetDowncount() const {
return downcount;
}
/// Returns current time in emulated CPU cycles
u64 GetCPUTicks() const;
@ -154,7 +160,9 @@ private:
bool is_multicore{};
std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
/// Cycle timing
u64 ticks{};
s64 downcount{};
};
/// Creates a core timing event with the given name and callback.

View file

@ -38,15 +38,8 @@ s64 usToCycles(std::chrono::microseconds us) {
}
s64 nsToCycles(std::chrono::nanoseconds ns) {
if (static_cast<u64>(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) {
LOG_ERROR(Core_Timing, "Integer overflow, use max value");
return std::numeric_limits<s64>::max();
}
if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000);
}
return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
}
u64 msToClockCycles(std::chrono::milliseconds ns) {
@ -69,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) {
return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
}
std::chrono::milliseconds CyclesToMs(s64 cycles) {
const u128 temporal = Common::Multiply64Into128(cycles, 1000);
u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
return std::chrono::milliseconds(ms);
}
std::chrono::nanoseconds CyclesToNs(s64 cycles) {
const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
return std::chrono::nanoseconds(ns);
}
std::chrono::microseconds CyclesToUs(s64 cycles) {
const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
return std::chrono::microseconds(us);
}
} // namespace Core::Timing

View file

@ -16,18 +16,9 @@ s64 nsToCycles(std::chrono::nanoseconds ns);
u64 msToClockCycles(std::chrono::milliseconds ns);
u64 usToClockCycles(std::chrono::microseconds ns);
u64 nsToClockCycles(std::chrono::nanoseconds ns);
inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
}
inline std::chrono::nanoseconds CyclesToNs(s64 cycles) {
return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
}
inline std::chrono::microseconds CyclesToUs(s64 cycles) {
return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
}
std::chrono::milliseconds CyclesToMs(s64 cycles);
std::chrono::nanoseconds CyclesToNs(s64 cycles);
std::chrono::microseconds CyclesToUs(s64 cycles);
u64 CpuCyclesToClockCycles(u64 ticks);

View file

@ -232,13 +232,10 @@ void CpuManager::SingleCoreRunGuestLoop() {
auto* physical_core = &kernel.CurrentPhysicalCore();
auto& arm_interface = thread->ArmInterface();
system.EnterDynarmicProfile();
while (!physical_core->IsInterrupted()) {
if (!physical_core->IsInterrupted()) {
system.CoreTiming().ResetTicks();
arm_interface.Run();
physical_core = &kernel.CurrentPhysicalCore();
preemption_count++;
if (preemption_count % max_cycle_runs == 0) {
break;
}
}
system.ExitDynarmicProfile();
thread->SetPhantomMode(true);
@ -255,7 +252,7 @@ void CpuManager::SingleCoreRunIdleThread() {
auto& kernel = system.Kernel();
while (true) {
auto& physical_core = kernel.CurrentPhysicalCore();
PreemptSingleCore();
PreemptSingleCore(false);
idle_count++;
auto& scheduler = physical_core.Scheduler();
scheduler.TryDoContextSwitch();
@ -279,12 +276,15 @@ void CpuManager::SingleCoreRunSuspendThread() {
}
}
void CpuManager::PreemptSingleCore() {
preemption_count = 0;
void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
std::size_t old_core = current_core;
auto& scheduler = system.Kernel().Scheduler(old_core);
Kernel::Thread* current_thread = scheduler.GetCurrentThread();
if (idle_count >= 4) {
if (idle_count >= 4 || from_running_enviroment) {
if (!from_running_enviroment) {
system.CoreTiming().Idle();
idle_count = 0;
}
current_thread->SetPhantomMode(true);
system.CoreTiming().Advance();
current_thread->SetPhantomMode(false);

View file

@ -55,7 +55,7 @@ public:
std::function<void(void*)> GetSuspendThreadStartFunc();
void* GetStartFuncParamater();
void PreemptSingleCore();
void PreemptSingleCore(bool from_running_enviroment = true);
std::size_t CurrentCore() const {
return current_core.load();

View file

@ -1534,6 +1534,7 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
if (is_redundant && !system.Kernel().IsMulticore()) {
system.Kernel().ExitSVCProfile();
system.CoreTiming().AddTicks(1000U);
system.GetCpuManager().PreemptSingleCore();
system.Kernel().EnterSVCProfile();
}
@ -1762,6 +1763,10 @@ static u64 GetSystemTick(Core::System& system) {
// Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
const u64 result{system.CoreTiming().GetClockTicks()};
if (!system.Kernel().IsMulticore()) {
core_timing.AddTicks(400U);
}
return result;
}

View file

@ -246,19 +246,23 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
#ifdef ARCHITECTURE_x86_64
if (owner_process && !owner_process->Is64BitProcess()) {
thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
processor_id);
} else {
thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
processor_id);
}
#else
if (owner_process && !owner_process->Is64BitProcess()) {
thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch32, processor_id);
system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32,
processor_id);
} else {
thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch64, processor_id);
system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64,
processor_id);
}
LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
#endif