Merge pull request #84 from bunnei/fix-hw-synchronization
Fix GPU/HW synchronization
This commit is contained in:
commit
76372feb19
4 changed files with 51 additions and 34 deletions
|
@ -31,7 +31,9 @@ int __cdecl main(int argc, char **argv) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while(true) {
|
||||||
Core::RunLoop();
|
Core::RunLoop();
|
||||||
|
}
|
||||||
|
|
||||||
delete emu_window;
|
delete emu_window;
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
#include "common/log.h"
|
#include "common/log.h"
|
||||||
#include "common/symbols.h"
|
#include "common/symbols.h"
|
||||||
|
|
||||||
|
#include "video_core/video_core.h"
|
||||||
|
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/mem_map.h"
|
#include "core/mem_map.h"
|
||||||
#include "core/hw/hw.h"
|
#include "core/hw/hw.h"
|
||||||
|
@ -24,29 +26,17 @@ ARM_Interface* g_app_core = nullptr; ///< ARM11 application core
|
||||||
ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core
|
ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core
|
||||||
|
|
||||||
/// Run the core CPU loop
|
/// Run the core CPU loop
|
||||||
void RunLoop() {
|
void RunLoop(int tight_loop) {
|
||||||
for (;;){
|
g_app_core->Run(tight_loop);
|
||||||
// This function loops for 100 instructions in the CPU before trying to update hardware.
|
|
||||||
// This is a little bit faster than SingleStep, and should be pretty much equivalent. The
|
|
||||||
// number of instructions chosen is fairly arbitrary, however a large number will more
|
|
||||||
// drastically affect the frequency of GSP interrupts and likely break things. The point of
|
|
||||||
// this is to just loop in the CPU for more than 1 instruction to reduce overhead and make
|
|
||||||
// it a little bit faster...
|
|
||||||
g_app_core->Run(100);
|
|
||||||
HW::Update();
|
HW::Update();
|
||||||
if (HLE::g_reschedule) {
|
if (HLE::g_reschedule) {
|
||||||
Kernel::Reschedule();
|
Kernel::Reschedule();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Step the CPU one instruction
|
/// Step the CPU one instruction
|
||||||
void SingleStep() {
|
void SingleStep() {
|
||||||
g_app_core->Step();
|
RunLoop(1);
|
||||||
HW::Update();
|
|
||||||
if (HLE::g_reschedule) {
|
|
||||||
Kernel::Reschedule();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Halt the core
|
/// Halt the core
|
||||||
|
|
|
@ -19,8 +19,15 @@ extern ARM_Interface* g_sys_core; ///< ARM11 system (OS) core
|
||||||
/// Start the core
|
/// Start the core
|
||||||
void Start();
|
void Start();
|
||||||
|
|
||||||
/// Run the core CPU loop
|
/**
|
||||||
void RunLoop();
|
* Run the core CPU loop
|
||||||
|
* This function loops for 100 instructions in the CPU before trying to update hardware. This is a
|
||||||
|
* little bit faster than SingleStep, and should be pretty much equivalent. The number of
|
||||||
|
* instructions chosen is fairly arbitrary, however a large number will more drastically affect the
|
||||||
|
* frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU
|
||||||
|
* for more than 1 instruction to reduce overhead and make it a little bit faster...
|
||||||
|
*/
|
||||||
|
void RunLoop(int tight_loop=100);
|
||||||
|
|
||||||
/// Step the CPU one instruction
|
/// Step the CPU one instruction
|
||||||
void SingleStep();
|
void SingleStep();
|
||||||
|
|
|
@ -24,6 +24,7 @@ Regs g_regs;
|
||||||
|
|
||||||
u32 g_cur_line = 0; ///< Current vertical screen line
|
u32 g_cur_line = 0; ///< Current vertical screen line
|
||||||
u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line
|
u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line
|
||||||
|
u64 g_last_frame_ticks = 0; ///< CPU tick count from last frame
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void Read(T &var, const u32 raw_addr) {
|
inline void Read(T &var, const u32 raw_addr) {
|
||||||
|
@ -179,6 +180,25 @@ void Update() {
|
||||||
auto& framebuffer_top = g_regs.framebuffer_config[0];
|
auto& framebuffer_top = g_regs.framebuffer_config[0];
|
||||||
u64 current_ticks = Core::g_app_core->GetTicks();
|
u64 current_ticks = Core::g_app_core->GetTicks();
|
||||||
|
|
||||||
|
// Update the frame after a certain number of CPU ticks have elapsed. This assumes that the
|
||||||
|
// active frame in memory is always complete to render. There also may be issues with this
|
||||||
|
// becoming out-of-synch with GSP synchrinization code (as follows). At this time, this seems to
|
||||||
|
// be the most effective solution for both homebrew and retail applications. With retail, this
|
||||||
|
// could be moved below (and probably would guarantee more accurate synchronization). However,
|
||||||
|
// primitive homebrew relies on a vertical blank interrupt to happen inevitably (regardless of a
|
||||||
|
// threading reschedule).
|
||||||
|
|
||||||
|
if ((current_ticks - g_last_frame_ticks) > GPU::kFrameTicks) {
|
||||||
|
VideoCore::g_renderer->SwapBuffers();
|
||||||
|
g_last_frame_ticks = current_ticks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical
|
||||||
|
// blank, we need to simulate it. Based on testing, it seems that retail applications work more
|
||||||
|
// accurately when this is signalled between thread switches.
|
||||||
|
|
||||||
|
if (HLE::g_reschedule) {
|
||||||
|
|
||||||
// Synchronize line...
|
// Synchronize line...
|
||||||
if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
|
if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
|
||||||
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
|
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
|
||||||
|
@ -190,16 +210,14 @@ void Update() {
|
||||||
if (g_cur_line >= framebuffer_top.height) {
|
if (g_cur_line >= framebuffer_top.height) {
|
||||||
g_cur_line = 0;
|
g_cur_line = 0;
|
||||||
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
|
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
|
||||||
VideoCore::g_renderer->SwapBuffers();
|
}
|
||||||
Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
|
|
||||||
HLE::Reschedule(__func__);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Initialize hardware
|
/// Initialize hardware
|
||||||
void Init() {
|
void Init() {
|
||||||
g_cur_line = 0;
|
g_cur_line = 0;
|
||||||
g_last_line_ticks = Core::g_app_core->GetTicks();
|
g_last_frame_ticks = g_last_line_ticks = Core::g_app_core->GetTicks();
|
||||||
|
|
||||||
auto& framebuffer_top = g_regs.framebuffer_config[0];
|
auto& framebuffer_top = g_regs.framebuffer_config[0];
|
||||||
auto& framebuffer_sub = g_regs.framebuffer_config[1];
|
auto& framebuffer_sub = g_regs.framebuffer_config[1];
|
||||||
|
|
Loading…
Reference in a new issue