From 34097906684a80f1e131653ebb13659090c5233c Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 9 Nov 2014 01:26:03 -0500 Subject: [PATCH 1/5] ARM: Fixed several dyncom bugs. - Fixed NZCVT flags to properly save state when function returns. - Fixed counter to keep track of the actual number of instructions executed. - Fixed single-step mode to only execute one instruction at a time. - DefaultIni: Removed comment that no longer applied to dyncom. --- src/citra/default_ini.h | 2 +- src/core/arm/dyncom/arm_dyncom.cpp | 7 ++-- .../arm/dyncom/arm_dyncom_interpreter.cpp | 33 +++++++++++-------- src/core/arm/dyncom/arm_dyncom_interpreter.h | 2 +- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 7352c70c24..557da881b1 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -28,7 +28,7 @@ pad_sright = [Core] cpu_core = ## 0: Interpreter (default), 1: FastInterpreter (experimental) -gpu_refresh_rate = ## 60 (default), 1024 or 2048 may work better on the FastInterpreter +gpu_refresh_rate = ## 60 (default) [Data Storage] use_virtual_sd = diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index 669b612fc4..0b5dcccb90 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp @@ -110,9 +110,12 @@ u64 ARM_DynCom::GetTicks() const { * @param num_instructions Number of instructions to executes */ void ARM_DynCom::ExecuteInstructions(int num_instructions) { - ticks += num_instructions; state->NumInstrsToExecute = num_instructions; - InterpreterMainLoop(state.get()); + + // Dyncom only breaks on instruction dispatch. This only happens on every instruction when + // executing one instruction at a time. Otherwise, if a block is being executed, more + // instructions may actually be executed than specified. + ticks += InterpreterMainLoop(state.get()); } /** diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index fe1501b59c..f56a845183 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -3718,7 +3718,7 @@ static bool InAPrivilegedMode(arm_core_t *core) } /* r15 = r15 + 8 */ -void InterpreterMainLoop(ARMul_State* state) +unsigned InterpreterMainLoop(ARMul_State* state) { #define CRn inst_cream->crn #define OPCODE_2 inst_cream->opcode_2 @@ -3754,9 +3754,15 @@ void InterpreterMainLoop(ARMul_State* state) // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a // clunky switch statement. #if defined __GNUC__ || defined __clang__ -#define GOTO_NEXT_INST goto *InstLabel[inst_base->idx] +#define GOTO_NEXT_INST \ + if (num_instrs >= cpu->NumInstrsToExecute) goto END; \ + num_instrs++; \ + goto *InstLabel[inst_base->idx] #else -#define GOTO_NEXT_INST switch(inst_base->idx) { \ +#define GOTO_NEXT_INST \ + if (num_instrs >= cpu->NumInstrsToExecute) goto END; \ + num_instrs++; \ + switch(inst_base->idx) { \ case 0: goto VMLA_INST; \ case 1: goto VMLS_INST; \ case 2: goto VNMLA_INST; \ @@ -4028,20 +4034,15 @@ void InterpreterMainLoop(ARMul_State* state) unsigned int addr; unsigned int phys_addr; unsigned int last_pc = 0; + unsigned int num_instrs = 0; fault_t fault; static unsigned int last_physical_base = 0, last_logical_base = 0; int ptr; + bool single_step = (cpu->NumInstrsToExecute == 1); LOAD_NZCVT; DISPATCH: { - if (cpu->NumInstrsToExecute == 0) - return; - - cpu->NumInstrsToExecute--; - - //NOTICE_LOG(ARM11, "instr!"); - if (!cpu->NirqSig) { if (!(cpu->Cpsr & 0x80)) { goto END; @@ -4393,7 +4394,8 @@ void InterpreterMainLoop(ARMul_State* state) #define CP_ACCESS_ALLOW 0 if(CP_ACCESS_ALLOW){ /* undefined instruction here */ - return; + cpu->NumInstrsToExecute = 0; + return num_instrs; } ERROR_LOG(ARM11, "CDP insn inst=0x%x, pc=0x%x\n", inst_cream->inst, cpu->Reg[15]); unsigned cpab = (cpu->CDP[inst_cream->cp_num]) (cpu, ARMul_FIRST, inst_cream->inst); @@ -6532,12 +6534,14 @@ void InterpreterMainLoop(ARMul_State* state) cpu->AbortAddr = addr; cpu->CP15[CP15(CP15_FAULT_STATUS)] = fault & 0xff; cpu->CP15[CP15(CP15_FAULT_ADDRESS)] = addr; - return; + cpu->NumInstrsToExecute = 0; + return num_instrs; } END: { SAVE_NZCVT; - return; + cpu->NumInstrsToExecute = 0; + return num_instrs; } INIT_INST_LENGTH: { @@ -6557,7 +6561,8 @@ void InterpreterMainLoop(ARMul_State* state) DEBUG_LOG(ARM11, "%llx\n", InstLabel[1]); DEBUG_LOG(ARM11, "%lld\n", (char *)InstEndLabel[1] - (char *)InstLabel[1]); #endif - return; + cpu->NumInstrsToExecute = 0; + return num_instrs; } } diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.h b/src/core/arm/dyncom/arm_dyncom_interpreter.h index d73f8f65f9..c65eb23f73 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.h +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.h @@ -4,4 +4,4 @@ #pragma once -void InterpreterMainLoop(ARMul_State* state); +unsigned InterpreterMainLoop(ARMul_State* state); From 573756e2411465f4f5830d331bd2bc5581b9e6cb Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 9 Nov 2014 01:28:33 -0500 Subject: [PATCH 2/5] ARM: Removed unnecessary goto with each instruction. --- .../arm/dyncom/arm_dyncom_interpreter.cpp | 82 +++++++++---------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index f56a845183..f899e2e8a3 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -3747,7 +3747,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) #endif #define FETCH_INST if (inst_base->br != NON_BRANCH) \ - goto PROFILING; \ + goto DISPATCH; \ inst_base = (arm_inst *)&inst_buf[ptr] #define INC_PC(l) ptr += sizeof(arm_inst) + l @@ -4180,10 +4180,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) inst_base = (arm_inst *)&inst_buf[ptr]; GOTO_NEXT_INST; } - PROFILING: - { - goto DISPATCH; - } ADC_INST: { INC_ICOUNTER; @@ -4208,7 +4204,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(adc_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4242,7 +4238,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(add_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4273,7 +4269,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(and_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4291,11 +4287,11 @@ unsigned InterpreterMainLoop(ARMul_State* state) } SET_PC; INC_PC(sizeof(bbl_inst)); - goto PROFILING; + goto DISPATCH; } cpu->Reg[15] += GET_INST_SIZE(cpu); INC_PC(sizeof(bbl_inst)); - goto PROFILING; + goto DISPATCH; } BIC_INST: { @@ -4323,7 +4319,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(bic_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4359,12 +4355,12 @@ unsigned InterpreterMainLoop(ARMul_State* state) //DEBUG_MSG; } INC_PC(sizeof(blx_inst)); - goto PROFILING; + goto DISPATCH; } cpu->Reg[15] += GET_INST_SIZE(cpu); // INC_PC(sizeof(bx_inst)); INC_PC(sizeof(blx_inst)); - goto PROFILING; + goto DISPATCH; } BX_INST: { @@ -4377,12 +4373,12 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[15] = cpu->Reg[inst_cream->Rm] & 0xfffffffe; // cpu->TFlag = cpu->Reg[inst_cream->Rm] & 0x1; INC_PC(sizeof(bx_inst)); - goto PROFILING; + goto DISPATCH; } cpu->Reg[15] += GET_INST_SIZE(cpu); // INC_PC(sizeof(bx_inst)); INC_PC(sizeof(bx_inst)); - goto PROFILING; + goto DISPATCH; } BXJ_INST: CDP_INST: @@ -4524,7 +4520,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) // RD = RM; if ((inst_cream->Rd == 15)) { INC_PC(sizeof(mov_inst)); - goto PROFILING; + goto DISPATCH; } } // DEBUG_LOG(ARM11, "cpy inst %x\n", cpu->Reg[15]); @@ -4560,7 +4556,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(eor_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4719,7 +4715,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (BIT(inst, 15)) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4766,7 +4762,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->TFlag = value & 0x1; cpu->Reg[15] &= 0xFFFFFFFE; INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } //} cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4796,7 +4792,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->TFlag = value & 0x1; cpu->Reg[15] &= 0xFFFFFFFE; INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4850,7 +4846,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value; if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4871,7 +4867,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value; if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4928,7 +4924,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value; if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4955,7 +4951,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value; if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -4982,7 +4978,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value; if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5008,7 +5004,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value; if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5033,7 +5029,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value; if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5060,7 +5056,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) if (BITS(inst_cream->inst, 12, 15) == 15) { INC_PC(sizeof(ldst_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5230,7 +5226,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(mla_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5262,7 +5258,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(mov_inst)); - goto PROFILING; + goto DISPATCH; } // return; } @@ -5424,7 +5420,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(mul_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5453,7 +5449,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(mvn_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5485,7 +5481,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(orr_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5577,7 +5573,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(rsb_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5614,7 +5610,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(rsc_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -5655,7 +5651,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(sbc_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -6068,7 +6064,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } cpu->Reg[15] += GET_INST_SIZE(cpu); //if (BITS(inst_cream->inst, 12, 15) == 15) - // goto PROFILING; + // goto DISPATCH; INC_PC(sizeof(ldst_inst)); FETCH_INST; GOTO_NEXT_INST; @@ -6177,7 +6173,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } cpu->Reg[15] += GET_INST_SIZE(cpu); //if (BITS(inst_cream->inst, 12, 15) == 15) - // goto PROFILING; + // goto DISPATCH; INC_PC(sizeof(ldst_inst)); FETCH_INST; GOTO_NEXT_INST; @@ -6227,7 +6223,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) } if (inst_cream->Rd == 15) { INC_PC(sizeof(sub_inst)); - goto PROFILING; + goto DISPATCH; } } cpu->Reg[15] += GET_INST_SIZE(cpu); @@ -6451,7 +6447,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[15] = cpu->Reg[15] + 4 + inst_cream->imm; //DEBUG_LOG(ARM11, " BL_1_THUMB: imm=0x%x, r14=0x%x, r15=0x%x\n", inst_cream->imm, cpu->Reg[14], cpu->Reg[15]); INC_PC(sizeof(b_2_thumb)); - goto PROFILING; + goto DISPATCH; } B_COND_THUMB: { @@ -6463,7 +6459,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[15] += 2; //DEBUG_LOG(ARM11, " B_COND_THUMB: imm=0x%x, r15=0x%x\n", inst_cream->imm, cpu->Reg[15]); INC_PC(sizeof(b_cond_thumb)); - goto PROFILING; + goto DISPATCH; } BL_1_THUMB: { @@ -6489,7 +6485,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->Reg[14] = tmp; //DEBUG_LOG(ARM11, " BL_2_THUMB: imm=0x%x, r14=0x%x, r15=0x%x\n", inst_cream->imm, cpu->Reg[14], cpu->Reg[15]); INC_PC(sizeof(bl_2_thumb)); - goto PROFILING; + goto DISPATCH; } BLX_1_THUMB: { @@ -6505,7 +6501,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) cpu->TFlag = 0; //DEBUG_LOG(ARM11, "In BLX_1_THUMB, BLX(1),imm=0x%x,r14=0x%x, r15=0x%x, \n", inst_cream->imm, cpu->Reg[14], cpu->Reg[15]); INC_PC(sizeof(blx_1_thumb)); - goto PROFILING; + goto DISPATCH; } UQADD16_INST: From 0fab380801b7e56936f653d849f6f5e580a924a4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 9 Nov 2014 16:56:29 -0500 Subject: [PATCH 3/5] Citra-Qt: Use Core::RunLoop when not single stepping. --- src/citra_qt/bootmanager.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index 8f3799351b..20824692d8 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -33,19 +33,16 @@ void EmuThread::run() stop_run = false; while (!stop_run) { - for (int tight_loop = 0; tight_loop < 10000; ++tight_loop) + if (cpu_running) { - if (cpu_running || exec_cpu_step) - { - if (exec_cpu_step) - exec_cpu_step = false; - - Core::SingleStep(); - if (!cpu_running) { - emit CPUStepped(); - yieldCurrentThread(); - } - } + Core::RunLoop(); + } + else if (exec_cpu_step) + { + exec_cpu_step = false; + Core::SingleStep(); + emit CPUStepped(); + yieldCurrentThread(); } } render_window->moveContext(); From ce1125d49099d0f42ccca53ba89fe3263912ae56 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 9 Nov 2014 16:56:57 -0500 Subject: [PATCH 4/5] Core: Changed RunLoop iterations to 1000 (slightly better performance). --- src/core/core.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/core.h b/src/core/core.h index 872dc0cd1d..850bb0ab42 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -26,13 +26,13 @@ void Start(); /** * Run the core CPU loop - * This function loops for 100 instructions in the CPU before trying to update hardware. This is a - * little bit faster than SingleStep, and should be pretty much equivalent. The number of - * instructions chosen is fairly arbitrary, however a large number will more drastically affect the - * frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU - * for more than 1 instruction to reduce overhead and make it a little bit faster... + * This function runs the core for the specified number of CPU instructions before trying to update + * hardware. This is much faster than SingleStep (and should be equivalent), as the CPU is not + * required to do a full dispatch with each instruction. NOTE: the number of instructions requested + * is not guaranteed to run, as this will be interrupted preemptively if a hardware update is + * requested (e.g. on a thread switch). */ -void RunLoop(int tight_loop=100); +void RunLoop(int tight_loop=1000); /// Step the CPU one instruction void SingleStep(); From b8e6f52419816a7afa6629c401c23faf8ae8ae67 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 9 Nov 2014 17:00:59 -0500 Subject: [PATCH 5/5] ARM: Fixed dyncom to use reg15 for PC (this core doesn't use pc variable). - Fixes single stepping in debugger. --- src/core/arm/dyncom/arm_dyncom.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index 0b5dcccb90..a3ed3e31e5 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp @@ -60,7 +60,7 @@ void ARM_DynCom::SetPC(u32 pc) { * @return Returns current PC */ u32 ARM_DynCom::GetPC() const { - return state->pc; + return state->Reg[15]; } /** @@ -129,7 +129,7 @@ void ARM_DynCom::SaveContext(ThreadContext& ctx) { ctx.sp = state->Reg[13]; ctx.lr = state->Reg[14]; - ctx.pc = state->pc; + ctx.pc = state->Reg[15]; ctx.cpsr = state->Cpsr; ctx.fpscr = state->VFP[1];