From d743adf5186ba0b706fac6ad807f6c48d9297d48 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 4 Jul 2016 17:22:11 +0800
Subject: [PATCH] Reorganisation, Import Skyeye, This is a mess

---
 CMakeLists.txt                                |    3 +-
 src/CMakeLists.txt                            |   54 +-
 src/backend_x64/CMakeLists.txt                |   18 -
 src/backend_x64/emit_x64.cpp                  |   81 +-
 src/backend_x64/emit_x64.h                    |    6 +-
 src/backend_x64/interface_x64.cpp             |  102 +
 src/backend_x64/jitstate.h                    |    6 +-
 src/backend_x64/reg_alloc.cpp                 |   31 +-
 src/backend_x64/reg_alloc.h                   |    2 +-
 src/common/CMakeLists.txt                     |   28 -
 src/common/bit_util.h                         |    6 +-
 src/common/logging/log.h                      |    1 +
 src/{frontend_arm => frontend}/arm_types.h    |    0
 src/{frontend_arm => frontend}/decoder/arm.h  |    2 +-
 .../decoder/decoder_detail.h                  |    0
 src/frontend/decoder/thumb1.h                 |  165 +
 .../disassembler_arm.cpp}                     |    6 +-
 .../disassembler_arm.h}                       |    0
 src/{frontend_arm => frontend}/frontend_arm.h |    0
 src/{frontend_arm => frontend}/ir/ir.cpp      |    2 +-
 src/{frontend_arm => frontend}/ir/ir.h        |    4 +-
 src/{frontend_arm => frontend}/ir/opcodes.h   |    0
 src/{frontend_arm => frontend}/ir/opcodes.inc |    1 +
 src/{frontend_arm => frontend}/ir_emitter.cpp |    5 +
 src/{frontend_arm => frontend}/ir_emitter.h   |    7 +-
 src/frontend/translate_thumb.h                |   88 +
 src/frontend_arm/CMakeLists.txt               |   23 -
 src/frontend_arm/decoder/thumb1.h             |  166 -
 src/frontend_arm/translate_thumb.h            |  293 -
 src/interface/interface.h                     |   62 +-
 src/tests/CMakeLists.txt                      |   14 -
 tests/CMakeLists.txt                          |   35 +
 {src/tests => tests}/arm/fuzz_thumb.cpp       |    2 +
 .../arm/test_arm_disassembler.cpp             |    2 +-
 .../arm/test_thumb_instructions.cpp           |    8 +-
 {src/tests => tests}/main.cpp                 |    0
 .../dyncom/arm_dyncom_dec.cpp                 |  466 ++
 .../dyncom/arm_dyncom_dec.h                   |   39 +
 .../dyncom/arm_dyncom_interpreter.cpp         | 6876 +++++++++++++++++
 .../dyncom/arm_dyncom_interpreter.h           |   10 +
 .../dyncom/arm_dyncom_run.h                   |   48 +
 .../dyncom/arm_dyncom_thumb.cpp               |  393 +
 .../dyncom/arm_dyncom_thumb.h                 |   49 +
 .../skyeye_common/arm_regformat.h             |  187 +
 .../skyeye_common/armstate.cpp                |  670 ++
 .../skyeye_common/armstate.h                  |  255 +
 .../skyeye_common/armsupp.cpp                 |  207 +
 .../skyeye_common/armsupp.h                   |   32 +
 .../skyeye_common/vfp/asm_vfp.h               |   83 +
 .../skyeye_common/vfp/vfp.cpp                 |  162 +
 .../skyeye_common/vfp/vfp.h                   |   43 +
 .../skyeye_common/vfp/vfp_helper.h            |  450 ++
 .../skyeye_common/vfp/vfpdouble.cpp           | 1262 +++
 .../skyeye_common/vfp/vfpinstr.cpp            | 1788 +++++
 .../skyeye_common/vfp/vfpsingle.cpp           | 1287 +++
 55 files changed, 14939 insertions(+), 591 deletions(-)
 delete mode 100644 src/backend_x64/CMakeLists.txt
 create mode 100644 src/backend_x64/interface_x64.cpp
 delete mode 100644 src/common/CMakeLists.txt
 rename src/{frontend_arm => frontend}/arm_types.h (100%)
 rename src/{frontend_arm => frontend}/decoder/arm.h (99%)
 rename src/{frontend_arm => frontend}/decoder/decoder_detail.h (100%)
 create mode 100644 src/frontend/decoder/thumb1.h
 rename src/{frontend_arm/arm_disassembler.cpp => frontend/disassembler_arm.cpp} (99%)
 rename src/{frontend_arm/arm_disassembler.h => frontend/disassembler_arm.h} (100%)
 rename src/{frontend_arm => frontend}/frontend_arm.h (100%)
 rename src/{frontend_arm => frontend}/ir/ir.cpp (99%)
 rename src/{frontend_arm => frontend}/ir/ir.h (98%)
 rename src/{frontend_arm => frontend}/ir/opcodes.h (100%)
 rename src/{frontend_arm => frontend}/ir/opcodes.inc (95%)
 rename src/{frontend_arm => frontend}/ir_emitter.cpp (89%)
 rename src/{frontend_arm => frontend}/ir_emitter.h (86%)
 create mode 100644 src/frontend/translate_thumb.h
 delete mode 100644 src/frontend_arm/CMakeLists.txt
 delete mode 100644 src/frontend_arm/decoder/thumb1.h
 delete mode 100644 src/frontend_arm/translate_thumb.h
 delete mode 100644 src/tests/CMakeLists.txt
 create mode 100644 tests/CMakeLists.txt
 rename {src/tests => tests}/arm/fuzz_thumb.cpp (86%)
 rename {src/tests => tests}/arm/test_arm_disassembler.cpp (96%)
 rename {src/tests => tests}/arm/test_thumb_instructions.cpp (88%)
 rename {src/tests => tests}/main.cpp (100%)
 create mode 100644 tests/skyeye_interpreter/dyncom/arm_dyncom_dec.cpp
 create mode 100644 tests/skyeye_interpreter/dyncom/arm_dyncom_dec.h
 create mode 100644 tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.cpp
 create mode 100644 tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.h
 create mode 100644 tests/skyeye_interpreter/dyncom/arm_dyncom_run.h
 create mode 100644 tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.cpp
 create mode 100644 tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.h
 create mode 100644 tests/skyeye_interpreter/skyeye_common/arm_regformat.h
 create mode 100644 tests/skyeye_interpreter/skyeye_common/armstate.cpp
 create mode 100644 tests/skyeye_interpreter/skyeye_common/armstate.h
 create mode 100644 tests/skyeye_interpreter/skyeye_common/armsupp.cpp
 create mode 100644 tests/skyeye_interpreter/skyeye_common/armsupp.h
 create mode 100644 tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h
 create mode 100644 tests/skyeye_interpreter/skyeye_common/vfp/vfp.cpp
 create mode 100644 tests/skyeye_interpreter/skyeye_common/vfp/vfp.h
 create mode 100644 tests/skyeye_interpreter/skyeye_common/vfp/vfp_helper.h
 create mode 100644 tests/skyeye_interpreter/skyeye_common/vfp/vfpdouble.cpp
 create mode 100644 tests/skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp
 create mode 100644 tests/skyeye_interpreter/skyeye_common/vfp/vfpsingle.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 49ae7f9f..97043c98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ project(dynarmic)
 option(DYNARMIC_USE_SYSTEM_BOOST "Use the system boost libraries" ON)
 
 # Compiler flags
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -Wall -Werror -Wextra -pedantic -Wfatal-errors -Wno-unused-parameter -static-libgcc -static-libstdc++")
+add_compile_options(--std=c++14 -Wall -Werror -Wextra -pedantic -Wfatal-errors -Wno-unused-parameter -static-libgcc -static-libstdc++)
 
 # Arch detection
 include(CheckSymbolExists)
@@ -56,3 +56,4 @@ enable_testing(true)  # Enables unit-testing.
 
 # Dynarmic project files
 add_subdirectory(src)
+add_subdirectory(tests)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f181ff55..370811a6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,6 +1,52 @@
 include_directories(.)
 
-add_subdirectory(backend_x64)
-add_subdirectory(common)
-add_subdirectory(frontend_arm)
-add_subdirectory(tests)
+set(SRCS
+    backend_x64/emit_x64.cpp
+    backend_x64/interface_x64.cpp
+    backend_x64/reg_alloc.cpp
+    backend_x64/routines.cpp
+    common/logging/log.cpp
+    common/memory_util.cpp
+    common/string_util.cpp
+    common/x64/abi.cpp
+    common/x64/cpu_detect.cpp
+    common/x64/emitter.cpp
+    frontend/disassembler_arm.cpp
+    frontend/ir/ir.cpp
+    frontend/ir_emitter.cpp
+    )
+
+set(HEADERS
+    backend_x64/emit_x64.h
+    backend_x64/jitstate.h
+    backend_x64/reg_alloc.h
+    backend_x64/routines.h
+    common/assert.h
+    common/bit_set.h
+    common/bit_util.h
+    common/code_block.h
+    common/common_types.h
+    common/logging/log.h
+    common/memory_util.h
+    common/mp.h
+    common/scope_exit.h
+    common/string_util.h
+    common/x64/abi.h
+    common/x64/cpu_detect.h
+    common/x64/emitter.h
+    frontend/arm_types.h
+    frontend/decoder/arm.h
+    frontend/decoder/decoder_detail.h
+    frontend/decoder/thumb1.h
+    frontend/disassembler_arm.h
+    frontend/frontend_arm.h
+    frontend/ir/ir.h
+    frontend/ir/opcodes.h
+    frontend/ir_emitter.h
+    frontend/translate_thumb.h
+    interface/interface.h
+    )
+
+source_group(dynarmic FILES ${SRCS} ${HEADERS})
+add_library(dynarmic STATIC ${SRCS} ${HEADERS})
+set_target_properties(dynarmic PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/src/backend_x64/CMakeLists.txt b/src/backend_x64/CMakeLists.txt
deleted file mode 100644
index 850e791e..00000000
--- a/src/backend_x64/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-set(SRCS
-    emit_x64.cpp
-    reg_alloc.cpp
-    routines.cpp
-    )
-
-set(HEADERS
-    ../interface/interface.h
-    emit_x64.h
-    jitstate.h
-    reg_alloc.h
-    routines.h
-    )
-
-source_group(frontend_x64 FILES ${SRCS} ${HEADERS})
-add_library(dynarmic_backend_x64 STATIC ${SRCS} ${HEADERS})
-target_link_libraries(dynarmic_backend_x64 dynarmic_common)
-set_target_properties(dynarmic_backend_x64 PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp
index da7863e8..bd40099f 100644
--- a/src/backend_x64/emit_x64.cpp
+++ b/src/backend_x64/emit_x64.cpp
@@ -22,7 +22,7 @@ namespace BackendX64 {
 // Mapping from opcode to Emit* member function.
 const static std::map<IR::Opcode, void (EmitX64::*)(IR::Value*)> emit_fns {
 #define OPCODE(name, type, ...) { IR::Opcode::name, &EmitX64::Emit##name },
-#include "frontend_arm/ir/opcodes.inc"
+#include "frontend/ir/opcodes.inc"
 #undef OPCODE
 };
 
@@ -33,7 +33,7 @@ static IR::Inst* FindUseWithOpcode(IR::Inst* inst, IR::Opcode opcode) {
     return iter == uses.end() ? nullptr : reinterpret_cast<IR::Inst*>(iter->get());
 }
 
-CodePtr EmitX64::Emit(Dynarmic::IR::Block block) {
+CodePtr EmitX64::Emit(Arm::LocationDescriptor descriptor, Dynarmic::IR::Block block) {
     code->INT3();
     CodePtr code_ptr = code->GetCodePtr();
 
@@ -98,6 +98,8 @@ void EmitX64::EmitGetNFlag(IR::Value* value_) {
 
     X64Reg result = reg_alloc.DefRegister(value);
 
+    // TODO: Flag optimization
+
     code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
     code->SHR(32, R(result), Imm8(31));
 }
@@ -107,6 +109,8 @@ void EmitX64::EmitSetNFlag(IR::Value* value_) {
 
     X64Reg to_store = reg_alloc.UseRegister(value->GetArg(0).get());
 
+    // TODO: Flag optimization
+
     code->SHL(32, R(to_store), Imm8(31));
     code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 31)));
     code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
@@ -117,6 +121,8 @@ void EmitX64::EmitGetZFlag(IR::Value* value_) {
 
     X64Reg result = reg_alloc.DefRegister(value);
 
+    // TODO: Flag optimization
+
     code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
     code->SHR(32, R(result), Imm8(30));
     code->AND(32, R(result), Imm32(1));
@@ -127,6 +133,8 @@ void EmitX64::EmitSetZFlag(IR::Value* value_) {
 
     X64Reg to_store = reg_alloc.UseRegister(value->GetArg(0).get());
 
+    // TODO: Flag optimization
+
     code->SHL(32, R(to_store), Imm8(30));
     code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 30)));
     code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
@@ -137,6 +145,8 @@ void EmitX64::EmitGetCFlag(IR::Value* value_) {
 
     X64Reg result = reg_alloc.DefRegister(value);
 
+    // TODO: Flag optimization
+
     code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
     code->SHR(32, R(result), Imm8(29));
     code->AND(32, R(result), Imm32(1));
@@ -147,6 +157,8 @@ void EmitX64::EmitSetCFlag(IR::Value* value_) {
 
     X64Reg to_store = reg_alloc.UseRegister(value->GetArg(0).get());
 
+    // TODO: Flag optimization
+
     code->SHL(32, R(to_store), Imm8(29));
     code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 29)));
     code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
@@ -157,6 +169,8 @@ void EmitX64::EmitGetVFlag(IR::Value* value_) {
 
     X64Reg result = reg_alloc.DefRegister(value);
 
+    // TODO: Flag optimization
+
     code->MOV(32, R(result), MDisp(R15, offsetof(JitState, Cpsr)));
     code->SHR(32, R(result), Imm8(28));
     code->AND(32, R(result), Imm32(1));
@@ -167,6 +181,8 @@ void EmitX64::EmitSetVFlag(IR::Value* value_) {
 
     X64Reg to_store = reg_alloc.UseRegister(value->GetArg(0).get());
 
+    // TODO: Flag optimization
+
     code->SHL(32, R(to_store), Imm8(28));
     code->AND(32, MDisp(R15, offsetof(JitState, Cpsr)), Imm32(~static_cast<u32>(1 << 28)));
     code->OR(32, MDisp(R15, offsetof(JitState, Cpsr)), R(to_store));
@@ -179,6 +195,8 @@ void EmitX64::EmitGetCarryFromOp(IR::Value*) {
 void EmitX64::EmitLeastSignificantByte(IR::Value* value_) {
     auto value = reinterpret_cast<IR::Inst*>(value_);
 
+    // TODO: Flag optimization
+
     reg_alloc.UseDefRegister(value->GetArg(0).get(), value);
 }
 
@@ -187,6 +205,8 @@ void EmitX64::EmitMostSignificantBit(IR::Value* value_) {
 
     X64Reg result = reg_alloc.UseDefRegister(value->GetArg(0).get(), value);
 
+    // TODO: Flag optimization
+
     code->SHL(32, R(result), Imm8(31));
 }
 
@@ -195,6 +215,8 @@ void EmitX64::EmitIsZero(IR::Value* value_) {
 
     X64Reg result = reg_alloc.UseDefRegister(value->GetArg(0).get(), value);
 
+    // TODO: Flag optimization
+
     code->TEST(32, R(result), R(result));
     code->SETcc(CCFlags::CC_E, R(result));
     code->MOVZX(32, 8, result, R(result));
@@ -303,7 +325,62 @@ void EmitX64::EmitLogicalShiftRight(IR::Value* value_) {
     }
 }
 
+void EmitX64::EmitArithmeticShiftRight(IR::Value* value_) {
+    auto value = reinterpret_cast<IR::Inst*>(value_);
+    auto carry_inst = FindUseWithOpcode(value, IR::Opcode::GetCarryFromOp);
+
+    if (!carry_inst) {
+        X64Reg shift = reg_alloc.UseRegister(value->GetArg(1).get(), {HostLoc::RCX});
+        X64Reg result = reg_alloc.UseDefRegister(value->GetArg(0).get(), value);
+        //X64Reg zero = reg_alloc.ScratchRegister();
+
+        // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift.
+        // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
+
+        // TODO: Optimize this.
+
+        code->CMP(8, R(shift), Imm8(31));
+        auto Rs_gt31 = code->J_CC(CC_A);
+        // if (Rs & 0xFF <= 31) {
+        code->SAR(32, R(result), R(shift));
+        auto jmp_to_end = code->J();
+        // } else {
+        code->SetJumpTarget(Rs_gt31);
+        code->SAR(32, R(result), Imm8(31)); // Verified.
+        // }
+        code->SetJumpTarget(jmp_to_end);
+    } else {
+        inhibit_emission.insert(carry_inst);
+
+        X64Reg shift = reg_alloc.UseRegister(value->GetArg(1).get(), {HostLoc::RCX});
+        X64Reg result = reg_alloc.UseDefRegister(value->GetArg(0).get(), value);
+        X64Reg carry = reg_alloc.UseDefRegister(value->GetArg(2).get(), carry_inst);
+
+        // TODO: Optimize this.
+
+        code->CMP(8, R(shift), Imm8(31));
+        auto Rs_gt31 = code->J_CC(CC_A);
+        // if (Rs & 0xFF == 0) goto end;
+        code->TEST(8, R(shift), R(shift));
+        auto Rs_zero = code->J_CC(CC_Z);
+        // if (Rs & 0xFF <= 31) {
+        code->SAR(32, R(result), R(CL));
+        code->SETcc(CC_C, R(carry));
+        auto jmp_to_end = code->J();
+        // } else if (Rs & 0xFF > 31) {
+        code->SetJumpTarget(Rs_gt31);
+        code->SAR(32, R(result), Imm8(31)); // Verified.
+        code->BT(32, R(result), Imm8(31));
+        code->SETcc(CC_C, R(carry));
+        // }
+        code->SetJumpTarget(jmp_to_end);
+        code->SetJumpTarget(Rs_zero);
+    }
+}
+
 void EmitX64::EmitReturnToDispatch() {
+    // TODO: Update cycle counts
+
     code->JMP(routines->RunCodeReturnAddress(), true);
 }
 
diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h
index 25f71867..6b5f7c7c 100644
--- a/src/backend_x64/emit_x64.h
+++ b/src/backend_x64/emit_x64.h
@@ -11,7 +11,7 @@
 #include "backend_x64/reg_alloc.h"
 #include "backend_x64/routines.h"
 #include "common/x64/emitter.h"
-#include "frontend_arm/ir/ir.h"
+#include "frontend/ir/ir.h"
 #include "interface/interface.h"
 
 namespace Dynarmic {
@@ -21,7 +21,8 @@ class EmitX64 final {
 public:
     EmitX64(Gen::XEmitter* code, Routines* routines, UserCallbacks cb) : code(code), reg_alloc(code), routines(routines), cb(cb) {}
 
-    CodePtr Emit(IR::Block ir);
+    CodePtr Emit(Arm::LocationDescriptor descriptor, IR::Block ir);
+    CodePtr GetBasicBlock(Arm::LocationDescriptor descriptor);
 
     void EmitImmU1(IR::Value* value);
     void EmitImmU8(IR::Value* value);
@@ -43,6 +44,7 @@ public:
     void EmitIsZero(IR::Value* value);
     void EmitLogicalShiftLeft(IR::Value* value);
     void EmitLogicalShiftRight(IR::Value* value);
+    void EmitArithmeticShiftRight(IR::Value* value);
 
     void EmitReturnToDispatch();
 
diff --git a/src/backend_x64/interface_x64.cpp b/src/backend_x64/interface_x64.cpp
new file mode 100644
index 00000000..887bb2da
--- /dev/null
+++ b/src/backend_x64/interface_x64.cpp
@@ -0,0 +1,102 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <memory>
+
+#include "backend_x64/emit_x64.h"
+#include "backend_x64/jitstate.h"
+#include "backend_x64/routines.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/scope_exit.h"
+#include "frontend/arm_types.h"
+#include "interface/interface.h"
+
+namespace Dynarmic {
+
+using namespace BackendX64;
+
+struct BlockOfCode : Gen::XCodeBlock {
+    BlockOfCode() {
+        AllocCodeSpace(128 * 1024 * 1024);
+    }
+};
+
+struct Jit::Impl {
+    Impl(UserCallbacks callbacks) : emitter(&block_of_code, &routines, callbacks) {}
+
+    JitState jit_state{};
+    Routines routines{};
+    BlockOfCode block_of_code{};
+    EmitX64 emitter;
+
+    size_t Execute(size_t cycle_count) {
+        u32 pc = jit_state.Reg[15];
+        bool TFlag = Common::Bit<5>(jit_state.Cpsr);
+        bool EFlag = Common::Bit<9>(jit_state.Cpsr);
+
+        Arm::LocationDescriptor descriptor{pc, TFlag, EFlag};
+
+        CodePtr code_ptr = GetBasicBlock(descriptor);
+        return routines.RunCode(&jit_state, code_ptr, cycle_count);
+    }
+private:
+    CodePtr GetBasicBlock(Arm::LocationDescriptor descriptor) {
+        CodePtr code_ptr = emitter.GetBasicBlock(descriptor);
+        if (code_ptr)
+            return code_ptr;
+
+        IR::Block ir_block = IR::Block({0, false, false}); // TODO: Do this.
+        return emitter.Emit(descriptor, ir_block);
+    }
+};
+
+Jit::Jit(UserCallbacks callbacks) : callbacks(callbacks), impl(std::make_unique<Impl>(callbacks)) {}
+
+Jit::~Jit() {}
+
+size_t Jit::Run(size_t cycle_count) {
+    ASSERT(!is_executing);
+    is_executing = true;
+    SCOPE_EXIT({ this->is_executing = false; });
+
+    halt_requested = false;
+
+    size_t cycles_executed = 0;
+    while (cycles_executed < cycle_count && !halt_requested) {
+        cycles_executed += impl->Execute(cycle_count - cycles_executed);
+    }
+
+    return cycles_executed;
+}
+
+void Jit::ClearCache(bool poison_memory) {
+    ASSERT(!is_executing);
+}
+
+void Jit::HaltExecution() {
+    ASSERT(is_executing);
+    halt_requested = true;
+
+    // TODO: Uh do other stuff to JitState pls.
+}
+
+std::array<u32, 16>& Jit::Regs() {
+    return impl->jit_state.Reg;
+}
+std::array<u32, 16> Jit::Regs() const {
+    return impl->jit_state.Reg;
+}
+
+u32& Jit::Cpsr() {
+    return impl->jit_state.Cpsr;
+}
+u32 Jit::Cpsr() const {
+    return impl->jit_state.Cpsr;
+}
+
+} // namespace Dynarmic
diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h
index 1f6f4d45..adb22e15 100644
--- a/src/backend_x64/jitstate.h
+++ b/src/backend_x64/jitstate.h
@@ -15,15 +15,15 @@ namespace BackendX64 {
 constexpr size_t SpillCount = 32;
 
 struct JitState {
-    u32 Cpsr;
+    u32 Cpsr = 0;
     std::array<u32, 16> Reg{}; // Current register file.
     // TODO: Mode-specific register sets unimplemented.
 
     std::array<u32, SpillCount> Spill{}; // Spill.
 
     // For internal use (See: Routines::RunCode)
-    u64 save_host_RSP;
-    s64 cycles_remaining;
+    u64 save_host_RSP = 0;
+    s64 cycles_remaining = 0;
 };
 
 using CodePtr = const u8*;
diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp
index 6a3f96c2..f4cf9b55 100644
--- a/src/backend_x64/reg_alloc.cpp
+++ b/src/backend_x64/reg_alloc.cpp
@@ -14,22 +14,23 @@
 namespace Dynarmic {
 namespace BackendX64 {
 
+// TODO: Just turn this into a function that indexes a std::array.
 const static std::map<HostLoc, Gen::X64Reg> hostloc_to_x64 = {
-        { HostLoc::RAX, Gen::RAX },
-        { HostLoc::RBX, Gen::RBX },
-        { HostLoc::RCX, Gen::RCX },
-        { HostLoc::RDX, Gen::RDX },
-        { HostLoc::RSI, Gen::RSI },
-        { HostLoc::RDI, Gen::RDI },
-        { HostLoc::RBP, Gen::RBP },
-        { HostLoc::RSP, Gen::RSP },
-        { HostLoc::R8,  Gen::R8  },
-        { HostLoc::R9,  Gen::R9  },
-        { HostLoc::R10, Gen::R10 },
-        { HostLoc::R11, Gen::R11 },
-        { HostLoc::R12, Gen::R12 },
-        { HostLoc::R13, Gen::R13 },
-        { HostLoc::R14, Gen::R14 },
+    { HostLoc::RAX, Gen::RAX },
+    { HostLoc::RBX, Gen::RBX },
+    { HostLoc::RCX, Gen::RCX },
+    { HostLoc::RDX, Gen::RDX },
+    { HostLoc::RSI, Gen::RSI },
+    { HostLoc::RDI, Gen::RDI },
+    { HostLoc::RBP, Gen::RBP },
+    { HostLoc::RSP, Gen::RSP },
+    { HostLoc::R8,  Gen::R8  },
+    { HostLoc::R9,  Gen::R9  },
+    { HostLoc::R10, Gen::R10 },
+    { HostLoc::R11, Gen::R11 },
+    { HostLoc::R12, Gen::R12 },
+    { HostLoc::R13, Gen::R13 },
+    { HostLoc::R14, Gen::R14 },
 };
 
 static Gen::OpArg SpillToOpArg(HostLoc loc) {
diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h
index 3b589a7d..967a1fe2 100644
--- a/src/backend_x64/reg_alloc.h
+++ b/src/backend_x64/reg_alloc.h
@@ -11,7 +11,7 @@
 #include "backend_x64/jitstate.h"
 #include "common/common_types.h"
 #include "common/x64/emitter.h"
-#include "frontend_arm/ir/ir.h"
+#include "frontend/ir/ir.h"
 
 namespace Dynarmic {
 namespace BackendX64 {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
deleted file mode 100644
index 68755060..00000000
--- a/src/common/CMakeLists.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-set(SRCS
-    logging/log.cpp
-    memory_util.cpp
-    string_util.cpp
-    x64/abi.cpp
-    x64/cpu_detect.cpp
-    x64/emitter.cpp
-    )
-
-set(HEADERS
-    assert.h
-    bit_set.h
-    bit_util.h
-    code_block.h
-    common_types.h
-    logging/log.h
-    memory_util.h
-    mp.h
-    scope_exit.h
-    string_util.h
-    x64/abi.h
-    x64/cpu_detect.h
-    x64/emitter.h
-    )
-
-source_group(common FILES ${SRCS} ${HEADERS})
-add_library(dynarmic_common STATIC ${SRCS} ${HEADERS})
-set_target_properties(dynarmic_common PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index cbfcaff5..1b0d5bd4 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -32,7 +32,7 @@ constexpr T Bits(const T value) {
 
 /// Extracts a single bit at bit_position from value of type T.
 template<size_t bit_position, typename T>
-constexpr T Bit(const T value) {
+constexpr bool Bit(const T value) {
     static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
 
     return (value >> bit_position) & 1;
@@ -44,8 +44,8 @@ inline T SignExtend(const T value) {
     static_assert(bit_count <= BitSize<T>(), "bit_count larger than bitsize of T");
 
     constexpr T mask = static_cast<T>(1ULL << bit_count) - 1;
-    const T signbit = Bit<bit_count - 1>(value);
-    if (signbit != 0) {
+    const bool signbit = Bit<bit_count - 1, T>(value);
+    if (signbit) {
         return value | ~mask;
     }
     return value;
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 420a6e6a..f9426054 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -34,6 +34,7 @@ enum class Class : ClassType {
     Log,
     Common,
     Common_Memory,
+    Core_ARM11,
     Debug,
     Count ///< Total number of logging classes
 };
diff --git a/src/frontend_arm/arm_types.h b/src/frontend/arm_types.h
similarity index 100%
rename from src/frontend_arm/arm_types.h
rename to src/frontend/arm_types.h
diff --git a/src/frontend_arm/decoder/arm.h b/src/frontend/decoder/arm.h
similarity index 99%
rename from src/frontend_arm/decoder/arm.h
rename to src/frontend/decoder/arm.h
index 241639d6..79088bdb 100644
--- a/src/frontend_arm/decoder/arm.h
+++ b/src/frontend/decoder/arm.h
@@ -16,7 +16,7 @@
 #include <boost/optional.hpp>
 
 #include "common/common_types.h"
-#include "frontend_arm/decoder/decoder_detail.h"
+#include "frontend/decoder/decoder_detail.h"
 
 namespace Dynarmic {
 namespace Arm {
diff --git a/src/frontend_arm/decoder/decoder_detail.h b/src/frontend/decoder/decoder_detail.h
similarity index 100%
rename from src/frontend_arm/decoder/decoder_detail.h
rename to src/frontend/decoder/decoder_detail.h
diff --git a/src/frontend/decoder/thumb1.h b/src/frontend/decoder/thumb1.h
new file mode 100644
index 00000000..ce0d6300
--- /dev/null
+++ b/src/frontend/decoder/thumb1.h
@@ -0,0 +1,165 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <functional>
+#include <tuple>
+
+#include <boost/optional.hpp>
+
+#include "common/common_types.h"
+#include "frontend/decoder/decoder_detail.h"
+
+namespace Dynarmic {
+namespace Arm {
+
+template <typename Visitor>
+struct Thumb1Matcher {
+    using CallRetT = typename mp::MemFnInfo<decltype(&Visitor::thumb1_UDF), &Visitor::thumb1_UDF>::return_type;
+
+    Thumb1Matcher(const char* const name, u16 mask, u16 expect, std::function<CallRetT(Visitor&, u16)> fn)
+            : name(name), mask(mask), expect(expect), fn(fn) {}
+
+    /// Gets the name of this type of instruction.
+    const char* GetName() const {
+        return name;
+    }
+
+    /**
+     * Tests to see if the instruction is this type of instruction.
+     * @param instruction The instruction to test
+     * @returns true if the instruction is
+     */
+    bool Matches(u16 instruction) const {
+        return (instruction & mask) == expect;
+    }
+
+    /**
+     * Calls the corresponding instruction handler on visitor for this type of instruction.
+     * @param v The visitor to use
+     * @param instruction The instruction to decode.
+     */
+    CallRetT call(Visitor& v, u16 instruction) const {
+        assert(Matches(instruction));
+        return fn(v, instruction);
+    }
+
+private:
+    const char* name;
+    u16 mask, expect;
+    std::function<CallRetT(Visitor&, u16)> fn;
+};
+
+template <typename V>
+static const std::array<Thumb1Matcher<V>, 6> g_thumb1_instruction_table {{
+
+#define INST(fn, name, bitstring) detail::detail<Thumb1Matcher, u16, 16>::GetMatcher<decltype(fn), fn>(name, bitstring)
+
+    // Shift (immediate), add, subtract, move and compare instructions
+    { INST(&V::thumb1_LSL_imm,        "LSL (imm)",                "00000vvvvvmmmddd") },
+    { INST(&V::thumb1_LSR_imm,        "LSR (imm)",                "00001vvvvvmmmddd") },
+    { INST(&V::thumb1_ASR_imm,        "ASR (imm)",                "00010vvvvvmmmddd") },
+    //{ INST(&V::thumb1_ADD_rrr,        "ADD (rrr)",                "0001100mmmnnnddd") },
+    //{ INST(&V::thumb1_SUB_rrr,        "SUB (rrr)",                "0001101mmmnnnddd") },
+    //{ INST(&V::thumb1_ADD_rri,        "ADD (rri)",                "0001110mmmnnnddd") },
+    //{ INST(&V::thumb1_SUB_rri,        "SUB (rri)",                "0001111mmmnnnddd") },
+    //{ INST(&V::thumb1_MOV_ri,         "MOV (ri)",                 "00100dddvvvvvvvv") },
+    //{ INST(&V::thumb1_CMP_ri,         "CMP (ri)",                 "00101dddvvvvvvvv") },
+    //{ INST(&V::thumb1_ADD_ri,         "ADD (ri)",                 "00110dddvvvvvvvv") },
+    //{ INST(&V::thumb1_SUB_ri,         "SUB (ri)",                 "00111dddvvvvvvvv") },
+
+     // Data-processing instructions
+//    { INST(&V::thumb1_AND_reg,        "AND (reg)",                "0100000000mmmddd") },
+//    { INST(&V::thumb1_EOR_reg,        "EOR (reg)",                "0100000001mmmddd") },
+    { INST(&V::thumb1_LSL_reg,        "LSL (reg)",                "0100000010mmmddd") },
+    { INST(&V::thumb1_LSR_reg,        "LSR (reg)",                "0100000011sssddd") },
+    { INST(&V::thumb1_ASR_reg,        "ASR (reg)",                "0100000100sssddd") },
+    //{ INST(&V::thumb1_ADCS_rr,        "ADCS (rr)",                "0100000101mmmddd") },
+    //{ INST(&V::thumb1_SBCS_rr,        "SBCS (rr)",                "0100000110mmmddd") },
+    //{ INST(&V::thumb1_RORS_rr,        "RORS (rr)",                "0100000111sssddd") },
+    //{ INST(&V::thumb1_TST_rr,         "TST (rr)",                 "0100001000mmmnnn") },
+    //{ INST(&V::thumb1_NEGS_rr,        "NEGS (rr)",                "0100001001mmmddd") },
+    //{ INST(&V::thumb1_CMP_rr,         "CMP (rr)",                 "0100001010mmmnnn") },
+    //{ INST(&V::thumb1_CMN_rr,         "CMN (rr)",                 "0100001011mmmnnn") },
+    //{ INST(&V::thumb1_ORRS_rr,        "ORRS (rr)",                "0100001100mmmddd") },
+    //{ INST(&V::thumb1_MULS_rr,        "MULS (rr)",                "0100001101mmmddd") },
+    //{ INST(&V::thumb1_BICS_rr,        "BICS (rr)",                "0100001110mmmddd") },
+    //{ INST(&V::thumb1_MVNS_rr,        "MVNS (rr)",                "0100001111mmmddd") },
+
+    // Special data instructions
+    //{ INST(&V::thumb1_ADD_high,       "ADD (high)",               "01000100dmmmmddd") }, // v4T, Low regs: v6T2
+    //{ INST(&V::thumb1_CMP_high,       "CMP (high)",               "01000101dmmmmddd") }, // v4T
+    //{ INST(&V::thumb1_MOV_high,       "MOV (high)",               "01000110dmmmmddd") }, // v4T, Low regs: v6
+
+    // Store/Load single data item instructions
+    //{ INST(&V::thumb1_LDR_lit,        "LDR (literal)",            "01001dddvvvvvvvv") },
+    //{ INST(&V::thumb1_STR_rrr,        "STR (rrr)",                "0101000mmmnnnddd") },
+    //{ INST(&V::thumb1_STRH_rrr,       "STRH (rrr)",               "0101001mmmnnnddd") },
+    //{ INST(&V::thumb1_STRB_rrr,       "STRB (rrr)",               "0101010mmmnnnddd") },
+    //{ INST(&V::thumb1_LDRSB_rrr,      "LDRSB (rrr)",              "0101011mmmnnnddd") },
+    //{ INST(&V::thumb1_LDR_rrr,        "LDR (rrr)",                "0101100mmmnnnddd") },
+    //{ INST(&V::thumb1_LDRH_rrr,       "LDRH (rrr)",               "0101101mmmnnnddd") },
+    //{ INST(&V::thumb1_LDRB_rrr,       "LDRB (rrr)",               "0101110mmmnnnddd") },
+    //{ INST(&V::thumb1_LDRSH_rrr,      "LDRSH (rrr)",              "0101111mmmnnnddd") },
+    //{ INST(&V::thumb1_STRH_rri,       "STRH (rri)",               "10000vvvvvnnnddd") },
+    //{ INST(&V::thumb1_LDRH_rri,       "LDRH (rri)",               "10001vvvvvnnnddd") },
+    //{ INST(&V::thumb1_STR_sp,         "STR (SP)",                 "10010dddvvvvvvvv") },
+    //{ INST(&V::thumb1_LDR_sp,         "LDR (SP)",                 "10011dddvvvvvvvv") },
+
+    // Generate relative address instruction
+    //{ INST(&V::thumb1_ADR,            "ADR",                      "10100dddvvvvvvvv") },
+    //{ INST(&V::thumb1_ADD_sp,         "ADD (relative to SP)",     "10101dddvvvvvvvv") },
+
+    // Miscellaneous 16-bit instructions
+    //{ INST(&V::thumb1_ADD_spsp,       "ADD (imm to SP)",          "101100000vvvvvvv") }, // v4T
+    //{ INST(&V::thumb1_SUB_spsp,       "SUB (imm from SP)",        "101100001vvvvvvv") }, // v4T
+    //{ INST(&V::thumb1_SXTH,           "SXTH",                     "1011001000mmmddd") }, // v6
+    //{ INST(&V::thumb1_SXTB,           "SXTB",                     "1011001001mmmddd") }, // v6
+    //{ INST(&V::thumb1_UXTH,           "UXTH",                     "1011001010mmmddd") }, // v6
+    //{ INST(&V::thumb1_UXTB,           "UXTB",                     "1011001011mmmddd") }, // v6
+    //{ INST(&V::thumb1_PUSH,           "PUSH",                     "1011010rxxxxxxxx") }, // v4T
+    //{ INST(&V::thumb1_POP,            "POP",                      "1011110rxxxxxxxx") }, // v4T
+    //{ INST(&V::thumb1_SETEND,         "SETEND",                   "101101100101x000") }, // v6
+    //{ INST(&V::thumb1_CPS,            "CPS",                      "10110110011m0aif") }, // v6
+    //{ INST(&V::thumb1_REV,            "REV",                      "1011101000nnnddd") }, // v6
+    //{ INST(&V::thumb1_REV16,          "REV16",                    "1011101001nnnddd") }, // v6
+    //{ INST(&V::thumb1_REVSH,          "REVSH",                    "1011101011nnnddd") }, // v6
+    //{ INST(&V::thumb1_BKPT,           "BKPT",                     "10111110xxxxxxxx") }, // v5
+
+    // Store/Load multiple registers
+    //{ INST(&V::thumb1_STMIA,          "STMIA",                    "11000nnnxxxxxxxx") },
+    //{ INST(&V::thumb1_LDMIA,          "LDMIA",                    "11001nnnxxxxxxxx") },
+
+    // Branch instructions
+    //{ INST(&V::thumb1_BX,             "BX (reg)",                 "010001110mmmm000") }, // v4T
+    //{ INST(&V::thumb1_BLX,            "BLX (reg)",                "010001111mmmm000") }, // v5T
+    //{ INST(&V::thumb1_UDF,            "UDF",                      "11011110--------") },
+    //{ INST(&V::thumb1_SWI,            "SWI",                      "11011111xxxxxxxx") },
+    //{ INST(&V::thumb1_B_cond,         "B (cond)",                 "1101ccccxxxxxxxx") },
+    //{ INST(&V::thumb1_B_imm,          "B (imm)",                  "11100xxxxxxxxxxx") },
+    //{ INST(&V::thumb1_BLX_suffix,     "BLX (imm, suffix)",        "11101xxxxxxxxxx0") },
+    //{ INST(&V::thumb1_BLX_prefix,     "BL/BLX (imm, prefix)",     "11110xxxxxxxxxxx") },
+    //{ INST(&V::thumb1_BL_suffix,      "BL (imm, suffix)",         "11111xxxxxxxxxxx") },
+
+#undef INST
+
+}};
+
+template<typename Visitor>
+boost::optional<const Thumb1Matcher<Visitor>&> DecodeThumb1(u16 instruction) {
+    const auto& table = g_thumb1_instruction_table<Visitor>;
+    auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); };
+
+    assert(std::count_if(table.begin(), table.end(), matches_instruction) <= 1);
+
+    auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
+    return iter != table.end() ? boost::make_optional<const Thumb1Matcher<Visitor>&>(*iter) : boost::none;
+}
+
+} // namespace Arm
+} // namespace Dynarmic
diff --git a/src/frontend_arm/arm_disassembler.cpp b/src/frontend/disassembler_arm.cpp
similarity index 99%
rename from src/frontend_arm/arm_disassembler.cpp
rename to src/frontend/disassembler_arm.cpp
index c3ebbfb7..56f54795 100644
--- a/src/frontend_arm/arm_disassembler.cpp
+++ b/src/frontend/disassembler_arm.cpp
@@ -9,8 +9,8 @@
 
 #include "common/bit_util.h"
 #include "common/string_util.h"
-#include "frontend_arm/arm_types.h"
-#include "frontend_arm/decoder/arm.h"
+#include "frontend/arm_types.h"
+#include "frontend/decoder/arm.h"
 
 namespace Dynarmic {
 namespace Arm {
@@ -512,8 +512,6 @@ public:
     std::string arm_RFE() { return "ice"; }
     std::string arm_SETEND(bool E) { return "ice"; }
     std::string arm_SRS() { return "ice"; }
-
-
 };
 
 std::string DisassembleArm(u32 instruction) {
diff --git a/src/frontend_arm/arm_disassembler.h b/src/frontend/disassembler_arm.h
similarity index 100%
rename from src/frontend_arm/arm_disassembler.h
rename to src/frontend/disassembler_arm.h
diff --git a/src/frontend_arm/frontend_arm.h b/src/frontend/frontend_arm.h
similarity index 100%
rename from src/frontend_arm/frontend_arm.h
rename to src/frontend/frontend_arm.h
diff --git a/src/frontend_arm/ir/ir.cpp b/src/frontend/ir/ir.cpp
similarity index 99%
rename from src/frontend_arm/ir/ir.cpp
rename to src/frontend/ir/ir.cpp
index 9a466181..c7510395 100644
--- a/src/frontend_arm/ir/ir.cpp
+++ b/src/frontend/ir/ir.cpp
@@ -8,7 +8,7 @@
 #include <map>
 
 #include "common/assert.h"
-#include "frontend_arm/ir/ir.h"
+#include "frontend/ir/ir.h"
 
 namespace Dynarmic {
 namespace IR {
diff --git a/src/frontend_arm/ir/ir.h b/src/frontend/ir/ir.h
similarity index 98%
rename from src/frontend_arm/ir/ir.h
rename to src/frontend/ir/ir.h
index 67e500e9..acc61197 100644
--- a/src/frontend_arm/ir/ir.h
+++ b/src/frontend/ir/ir.h
@@ -13,8 +13,8 @@
 #include <boost/variant.hpp>
 
 #include "common/common_types.h"
-#include "frontend_arm/arm_types.h"
-#include "frontend_arm/ir/opcodes.h"
+#include "frontend/arm_types.h"
+#include "frontend/ir/opcodes.h"
 
 namespace Dynarmic {
 namespace IR {
diff --git a/src/frontend_arm/ir/opcodes.h b/src/frontend/ir/opcodes.h
similarity index 100%
rename from src/frontend_arm/ir/opcodes.h
rename to src/frontend/ir/opcodes.h
diff --git a/src/frontend_arm/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
similarity index 95%
rename from src/frontend_arm/ir/opcodes.inc
rename to src/frontend/ir/opcodes.inc
index 4a189228..2e9eadba 100644
--- a/src/frontend_arm/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -27,3 +27,4 @@ OPCODE(MostSignificantBit,      T::U1,          T::U32
 OPCODE(IsZero,                  T::U1,          T::U32                                          )
 OPCODE(LogicalShiftLeft,        T::U32,         T::U32,         T::U8,          T::U1           )
 OPCODE(LogicalShiftRight,       T::U32,         T::U32,         T::U8,          T::U1           )
+OPCODE(ArithmeticShiftRight,    T::U32,         T::U32,         T::U8,          T::U1           )
diff --git a/src/frontend_arm/ir_emitter.cpp b/src/frontend/ir_emitter.cpp
similarity index 89%
rename from src/frontend_arm/ir_emitter.cpp
rename to src/frontend/ir_emitter.cpp
index 4f72981b..ecff2566 100644
--- a/src/frontend_arm/ir_emitter.cpp
+++ b/src/frontend/ir_emitter.cpp
@@ -67,6 +67,11 @@ IREmitter::ResultAndCarry IREmitter::LogicalShiftRight(IR::ValuePtr value_in, IR
     return {result, carry_out};
 }
 
+IREmitter::ResultAndCarry IREmitter::ArithmeticShiftRight(IR::ValuePtr value_in, IR::ValuePtr shift_amount, IR::ValuePtr carry_in) {
+    auto result = Inst(IR::Opcode::ArithmeticShiftRight, {value_in, shift_amount, carry_in});
+    auto carry_out = Inst(IR::Opcode::GetCarryFromOp, {result});
+    return {result, carry_out};
+}
 
 IR::ValuePtr IREmitter::Inst(IR::Opcode op, std::initializer_list<IR::ValuePtr> args) {
     auto inst = std::make_shared<IR::Inst>(op);
diff --git a/src/frontend_arm/ir_emitter.h b/src/frontend/ir_emitter.h
similarity index 86%
rename from src/frontend_arm/ir_emitter.h
rename to src/frontend/ir_emitter.h
index 64c87dc6..1c3d2c21 100644
--- a/src/frontend_arm/ir_emitter.h
+++ b/src/frontend/ir_emitter.h
@@ -6,9 +6,9 @@
 
 #pragma once
 
-#include "frontend_arm/arm_types.h"
-#include "frontend_arm/ir/ir.h"
-#include "frontend_arm/ir/opcodes.h"
+#include "frontend/arm_types.h"
+#include "frontend/ir/ir.h"
+#include "frontend/ir/opcodes.h"
 
 namespace Dynarmic {
 namespace Arm {
@@ -40,6 +40,7 @@ public:
 
     ResultAndCarry LogicalShiftLeft(IR::ValuePtr value_in, IR::ValuePtr shift_amount, IR::ValuePtr carry_in);
     ResultAndCarry LogicalShiftRight(IR::ValuePtr value_in, IR::ValuePtr shift_amount, IR::ValuePtr carry_in);
+    ResultAndCarry ArithmeticShiftRight(IR::ValuePtr value_in, IR::ValuePtr shift_amount, IR::ValuePtr carry_in);
 
 private:
     IR::ValuePtr Inst(IR::Opcode op, std::initializer_list<IR::ValuePtr> args);
diff --git a/src/frontend/translate_thumb.h b/src/frontend/translate_thumb.h
new file mode 100644
index 00000000..3e4f7f2e
--- /dev/null
+++ b/src/frontend/translate_thumb.h
@@ -0,0 +1,88 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "frontend/arm_types.h"
+#include "frontend/ir_emitter.h"
+
+namespace Dynarmic {
+namespace Arm {
+
+class TranslatorVisitor {
+public:
+    IREmitter ir;
+
+    void thumb1_LSL_imm(Imm5 imm5, Reg m, Reg d) {
+        u8 shift_n = imm5;
+        // LSLS <Rd>, <Rm>, #<imm5>
+        auto cpsr_c = ir.GetCFlag();
+        auto result = ir.LogicalShiftLeft(ir.GetRegister(m), ir.Imm8(shift_n), cpsr_c);
+        ir.SetRegister(d, result.result);
+        ir.SetNFlag(ir.MostSignificantBit(result.result));
+        ir.SetZFlag(ir.IsZero(result.result));
+        ir.SetCFlag(result.carry);
+    }
+    void thumb1_LSR_imm(Imm5 imm5, Reg m, Reg d) {
+        u8 shift_n = imm5 != 0 ? imm5 : 32;
+        // LSRS <Rd>, <Rm>, #<imm5>
+        auto cpsr_c = ir.GetCFlag();
+        auto result = ir.LogicalShiftRight(ir.GetRegister(m), ir.Imm8(shift_n), cpsr_c);
+        ir.SetRegister(d, result.result);
+        ir.SetNFlag(ir.MostSignificantBit(result.result));
+        ir.SetZFlag(ir.IsZero(result.result));
+        ir.SetCFlag(result.carry);
+    }
+    void thumb1_ASR_imm(Imm5 imm5, Reg m, Reg d) {
+        u8 shift_n = imm5 != 0 ? imm5 : 32;
+        // ASRS <Rd>, <Rm>, #<imm5>
+        auto cpsr_c = ir.GetCFlag();
+        auto result = ir.ArithmeticShiftRight(ir.GetRegister(m), ir.Imm8(shift_n), cpsr_c);
+        ir.SetRegister(d, result.result);
+        ir.SetNFlag(ir.MostSignificantBit(result.result));
+        ir.SetZFlag(ir.IsZero(result.result));
+        ir.SetCFlag(result.carry);
+    }
+    void thumb1_LSL_reg(Reg m, Reg d_n) {
+        const Reg d = d_n, n = d_n;
+        // LSLS <Rdn>, <Rm>
+        auto shift_n = ir.LeastSignificantByte(ir.GetRegister(m));
+        auto apsr_c = ir.GetCFlag();
+        auto result_carry = ir.LogicalShiftLeft(ir.GetRegister(n), shift_n, apsr_c);
+        ir.SetRegister(d, result_carry.result);
+        ir.SetNFlag(ir.MostSignificantBit(result_carry.result));
+        ir.SetZFlag(ir.IsZero(result_carry.result));
+        ir.SetCFlag(result_carry.carry);
+    }
+    void thumb1_LSR_reg(Reg m, Reg d_n) {
+        const Reg d = d_n, n = d_n;
+        // LSRS <Rdn>, <Rm>
+        auto shift_n = ir.LeastSignificantByte(ir.GetRegister(m));
+        auto cpsr_c = ir.GetCFlag();
+        auto result = ir.LogicalShiftRight(ir.GetRegister(n), shift_n, cpsr_c);
+        ir.SetRegister(d, result.result);
+        ir.SetNFlag(ir.MostSignificantBit(result.result));
+        ir.SetZFlag(ir.IsZero(result.result));
+        ir.SetCFlag(result.carry);
+    }
+    void thumb1_ASR_reg(Reg m, Reg d_n) {
+        const Reg d = d_n, n = d_n;
+        // ASRS <Rdn>, <Rm>
+        auto shift_n = ir.LeastSignificantByte(ir.GetRegister(m));
+        auto cpsr_c = ir.GetCFlag();
+        auto result = ir.ArithmeticShiftRight(ir.GetRegister(n), shift_n, cpsr_c);
+        ir.SetRegister(d, result.result);
+        ir.SetNFlag(ir.MostSignificantBit(result.result));
+        ir.SetZFlag(ir.IsZero(result.result));
+        ir.SetCFlag(result.carry);
+    }
+
+
+    void thumb1_UDF() {}
+};
+
+} // namespace Arm
+} // namepsace Dynarmic
diff --git a/src/frontend_arm/CMakeLists.txt b/src/frontend_arm/CMakeLists.txt
deleted file mode 100644
index 8084337d..00000000
--- a/src/frontend_arm/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-set(SRCS
-    arm_disassembler.cpp
-    ir/ir.cpp
-    ir_emitter.cpp
-    )
-
-set(HEADERS
-    arm_disassembler.h
-    arm_types.h
-    decoder/arm.h
-    decoder/decoder_detail.h
-    decoder/thumb1.h
-    frontend_arm.h
-    ir/ir.h
-    ir/opcodes.h
-    ir_emitter.h
-    translate_thumb.h
-    )
-
-source_group(frontend_arm FILES ${SRCS} ${HEADERS})
-add_library(dynarmic_frontend_arm STATIC ${SRCS} ${HEADERS})
-target_link_libraries(dynarmic_frontend_arm dynarmic_common)
-set_target_properties(dynarmic_frontend_arm PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/src/frontend_arm/decoder/thumb1.h b/src/frontend_arm/decoder/thumb1.h
deleted file mode 100644
index de9733ca..00000000
--- a/src/frontend_arm/decoder/thumb1.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/* This file is part of the dynarmic project.
- * Copyright (c) 2016 MerryMage
- * This software may be used and distributed according to the terms of the GNU
- * General Public License version 2 or any later version.
- */
-
-#pragma once
-
-#include <array>
-#include <functional>
-#include <tuple>
-
-#include <boost/optional.hpp>
-
-#include "common/common_types.h"
-#include "frontend_arm/decoder/decoder_detail.h"
-
-namespace Dynarmic {
-namespace Arm {
-
-template <typename Visitor>
-struct Thumb1Matcher {
-    using CallRetT = typename mp::MemFnInfo<decltype(&Visitor::thumb1_UDF), &Visitor::thumb1_UDF>::return_type;
-
-    Thumb1Matcher(const char* const name, u16 mask, u16 expect, std::function<CallRetT(Visitor&, u16)> fn)
-            : name(name), mask(mask), expect(expect), fn(fn) {}
-
-    /// Gets the name of this type of instruction.
-    const char* GetName() const {
-        return name;
-    }
-
-    /**
-     * Tests to see if the instruction is this type of instruction.
-     * @param instruction The instruction to test
-     * @returns true if the instruction is
-     */
-    bool Matches(u16 instruction) const {
-        return (instruction & mask) == expect;
-    }
-
-    /**
-     * Calls the corresponding instruction handler on visitor for this type of instruction.
-     * @param v The visitor to use
-     * @param instruction The instruction to decode.
-     */
-    CallRetT call(Visitor& v, u16 instruction) const {
-        assert(Matches(instruction));
-        return fn(v, instruction);
-    }
-
-private:
-    const char* name;
-    u16 mask, expect;
-    std::function<CallRetT(Visitor&, u16)> fn;
-};
-
-template <typename V>
-static const std::array<Thumb1Matcher<V>, 2> g_thumb1_instruction_table {{
-
-#define INST(fn, name, bitstring) detail::detail<Thumb1Matcher, u16, 16>::GetMatcher<decltype(fn), fn>(name, bitstring)
-
-    // Shift (immediate), add, subtract, move and compare instructions
-    { INST(&V::thumb1_LSL_imm,        "LSL (imm)",                "00000vvvvvmmmddd") },
-    { INST(&V::thumb1_LSR_imm,        "LSR (imm)",                "00001vvvvvmmmddd") },
-        /*
-    { INST(&V::thumb1_ASR_rri,        "ASR (rri)",                "00010vvvvvmmmddd") },
-    { INST(&V::thumb1_ADD_rrr,        "ADD (rrr)",                "0001100mmmnnnddd") },
-    { INST(&V::thumb1_SUB_rrr,        "SUB (rrr)",                "0001101mmmnnnddd") },
-    { INST(&V::thumb1_ADD_rri,        "ADD (rri)",                "0001110mmmnnnddd") },
-    { INST(&V::thumb1_SUB_rri,        "SUB (rri)",                "0001111mmmnnnddd") },
-    { INST(&V::thumb1_MOV_ri,         "MOV (ri)",                 "00100dddvvvvvvvv") },
-    { INST(&V::thumb1_CMP_ri,         "CMP (ri)",                 "00101dddvvvvvvvv") },
-    { INST(&V::thumb1_ADD_ri,         "ADD (ri)",                 "00110dddvvvvvvvv") },
-    { INST(&V::thumb1_SUB_ri,         "SUB (ri)",                 "00111dddvvvvvvvv") },
-
-     // Data-processing instructions
-    { INST(&V::thumb1_ANDS_rr,        "ANDS (rr)",                "0100000000mmmddd") },
-    { INST(&V::thumb1_EORS_rr,        "EORS (rr)",                "0100000001mmmddd") },
-    { INST(&V::thumb1_LSLS_reg,       "LSLS (reg)",               "0100000010mmmddd") },
-    { INST(&V::thumb1_LSRS_rr,        "LSRS (rr)",                "0100000011sssddd") },
-    { INST(&V::thumb1_ASRS_rr,        "ASRS (rr)",                "0100000100sssddd") },
-    { INST(&V::thumb1_ADCS_rr,        "ADCS (rr)",                "0100000101mmmddd") },
-    { INST(&V::thumb1_SBCS_rr,        "SBCS (rr)",                "0100000110mmmddd") },
-    { INST(&V::thumb1_RORS_rr,        "RORS (rr)",                "0100000111sssddd") },
-    { INST(&V::thumb1_TST_rr,         "TST (rr)",                 "0100001000mmmnnn") },
-    { INST(&V::thumb1_NEGS_rr,        "NEGS (rr)",                "0100001001mmmddd") },
-    { INST(&V::thumb1_CMP_rr,         "CMP (rr)",                 "0100001010mmmnnn") },
-    { INST(&V::thumb1_CMN_rr,         "CMN (rr)",                 "0100001011mmmnnn") },
-    { INST(&V::thumb1_ORRS_rr,        "ORRS (rr)",                "0100001100mmmddd") },
-    { INST(&V::thumb1_MULS_rr,        "MULS (rr)",                "0100001101mmmddd") },
-    { INST(&V::thumb1_BICS_rr,        "BICS (rr)",                "0100001110mmmddd") },
-    { INST(&V::thumb1_MVNS_rr,        "MVNS (rr)",                "0100001111mmmddd") },
-
-    // Special data instructions
-    { INST(&V::thumb1_ADD_high,       "ADD (high)",               "01000100dmmmmddd") }, // v4T, Low regs: v6T2
-    { INST(&V::thumb1_CMP_high,       "CMP (high)",               "01000101dmmmmddd") }, // v4T
-    { INST(&V::thumb1_MOV_high,       "MOV (high)",               "01000110dmmmmddd") }, // v4T, Low regs: v6
-
-    // Store/Load single data item instructions
-    { INST(&V::thumb1_LDR_lit,        "LDR (literal)",            "01001dddvvvvvvvv") },
-    { INST(&V::thumb1_STR_rrr,        "STR (rrr)",                "0101000mmmnnnddd") },
-    { INST(&V::thumb1_STRH_rrr,       "STRH (rrr)",               "0101001mmmnnnddd") },
-    { INST(&V::thumb1_STRB_rrr,       "STRB (rrr)",               "0101010mmmnnnddd") },
-    { INST(&V::thumb1_LDRSB_rrr,      "LDRSB (rrr)",              "0101011mmmnnnddd") },
-    { INST(&V::thumb1_LDR_rrr,        "LDR (rrr)",                "0101100mmmnnnddd") },
-    { INST(&V::thumb1_LDRH_rrr,       "LDRH (rrr)",               "0101101mmmnnnddd") },
-    { INST(&V::thumb1_LDRB_rrr,       "LDRB (rrr)",               "0101110mmmnnnddd") },
-    { INST(&V::thumb1_LDRSH_rrr,      "LDRSH (rrr)",              "0101111mmmnnnddd") },
-    { INST(&V::thumb1_STRH_rri,       "STRH (rri)",               "10000vvvvvnnnddd") },
-    { INST(&V::thumb1_LDRH_rri,       "LDRH (rri)",               "10001vvvvvnnnddd") },
-    { INST(&V::thumb1_STR_sp,         "STR (SP)",                 "10010dddvvvvvvvv") },
-    { INST(&V::thumb1_LDR_sp,         "LDR (SP)",                 "10011dddvvvvvvvv") },
-
-    // Generate relative address instruction
-    { INST(&V::thumb1_ADR,            "ADR",                      "10100dddvvvvvvvv") },
-    { INST(&V::thumb1_ADD_sp,         "ADD (relative to SP)",     "10101dddvvvvvvvv") },
-
-    // Miscellaneous 16-bit instructions
-    { INST(&V::thumb1_ADD_spsp,       "ADD (imm to SP)",          "101100000vvvvvvv") }, // v4T
-    { INST(&V::thumb1_SUB_spsp,       "SUB (imm from SP)",        "101100001vvvvvvv") }, // v4T
-    { INST(&V::thumb1_SXTH,           "SXTH",                     "1011001000mmmddd") }, // v6
-    { INST(&V::thumb1_SXTB,           "SXTB",                     "1011001001mmmddd") }, // v6
-    { INST(&V::thumb1_UXTH,           "UXTH",                     "1011001010mmmddd") }, // v6
-    { INST(&V::thumb1_UXTB,           "UXTB",                     "1011001011mmmddd") }, // v6
-    { INST(&V::thumb1_PUSH,           "PUSH",                     "1011010rxxxxxxxx") }, // v4T
-    { INST(&V::thumb1_POP,            "POP",                      "1011110rxxxxxxxx") }, // v4T
-    { INST(&V::thumb1_SETEND,         "SETEND",                   "101101100101x000") }, // v6
-    { INST(&V::thumb1_CPS,            "CPS",                      "10110110011m0aif") }, // v6
-    { INST(&V::thumb1_REV,            "REV",                      "1011101000nnnddd") }, // v6
-    { INST(&V::thumb1_REV16,          "REV16",                    "1011101001nnnddd") }, // v6
-    { INST(&V::thumb1_REVSH,          "REVSH",                    "1011101011nnnddd") }, // v6
-    { INST(&V::thumb1_BKPT,           "BKPT",                     "10111110xxxxxxxx") }, // v5
-
-    // Store/Load multiple registers
-    { INST(&V::thumb1_STMIA,          "STMIA",                    "11000nnnxxxxxxxx") },
-    { INST(&V::thumb1_LDMIA,          "LDMIA",                    "11001nnnxxxxxxxx") },
-
-    // Branch instructions
-    { INST(&V::thumb1_BX,             "BX (reg)",                 "010001110mmmm000") }, // v4T
-    { INST(&V::thumb1_BLX,            "BLX (reg)",                "010001111mmmm000") }, // v5T
-    { INST(&V::thumb1_UDF,            "UDF",                      "11011110--------") },
-    { INST(&V::thumb1_SWI,            "SWI",                      "11011111xxxxxxxx") },
-    { INST(&V::thumb1_B_cond,         "B (cond)",                 "1101ccccxxxxxxxx") },
-    { INST(&V::thumb1_B_imm,          "B (imm)",                  "11100xxxxxxxxxxx") },
-    { INST(&V::thumb1_BLX_suffix,     "BLX (imm, suffix)",        "11101xxxxxxxxxx0") },
-    { INST(&V::thumb1_BLX_prefix,     "BL/BLX (imm, prefix)",     "11110xxxxxxxxxxx") },
-    { INST(&V::thumb1_BL_suffix,      "BL (imm, suffix)",         "11111xxxxxxxxxxx") },*/
-
-#undef INST
-
-}};
-
-template<typename Visitor>
-boost::optional<const Thumb1Matcher<Visitor>&> DecodeThumb1(u16 instruction) {
-    const auto& table = g_thumb1_instruction_table<Visitor>;
-    auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); };
-
-    assert(std::count_if(table.begin(), table.end(), matches_instruction) <= 1);
-
-    auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
-    return iter != table.end() ? boost::make_optional<const Thumb1Matcher<Visitor>&>(*iter) : boost::none;
-}
-
-} // namespace Arm
-} // namespace Dynarmic
diff --git a/src/frontend_arm/translate_thumb.h b/src/frontend_arm/translate_thumb.h
deleted file mode 100644
index 0a30f2a3..00000000
--- a/src/frontend_arm/translate_thumb.h
+++ /dev/null
@@ -1,293 +0,0 @@
-/* This file is part of the dynarmic project.
- * Copyright (c) 2016 MerryMage
- * This software may be used and distributed according to the terms of the GNU
- * General Public License version 2 or any later version.
- */
-
-#pragma once
-
-#include "frontend_arm/arm_types.h"
-#include "frontend_arm/ir_emitter.h"
-
-namespace Dynarmic {
-namespace Arm {
-
-class TranslatorVisitor {
-public:
-    IREmitter ir;
-
-    void thumb1_LSL_imm(Imm5 imm5, Reg m, Reg d) {
-        u8 shift_n = imm5;
-        // LSLS <Rd>, <Rm>, #<imm5>
-        auto cpsr_c = ir.GetCFlag();
-        auto result = ir.LogicalShiftLeft(ir.GetRegister(m), ir.Imm8(shift_n), cpsr_c);
-        ir.SetRegister(d, result.result);
-        ir.SetNFlag(ir.MostSignificantBit(result.result));
-        ir.SetZFlag(ir.IsZero(result.result));
-        ir.SetCFlag(result.carry);
-    }
-    void thumb1_LSR_imm(Imm5 imm5, Reg m, Reg d) {
-        u8 shift_n = imm5 != 0 ? imm5 : 32;
-        // LSRS <Rd>, <Rm>, #<imm5>
-        auto cpsr_c = ir.GetCFlag();
-        auto result = ir.LogicalShiftRight(ir.GetRegister(m), ir.Imm8(shift_n), cpsr_c);
-        ir.SetRegister(d, result.result);
-        ir.SetNFlag(ir.MostSignificantBit(result.result));
-        ir.SetZFlag(ir.IsZero(result.result));
-        ir.SetCFlag(result.carry);
-    }
-    void thumb1_ASR_rri(Imm5 imm5, Reg m, Reg d) {
-        ir.Unimplemented();
-    }
-    void thumb1_ADD_rrr(Reg m, Reg n, Reg d) {
-        ir.Unimplemented();
-    }
-    void thumb1_SUB_rrr(Reg m, Reg n, Reg d) {
-        ir.Unimplemented();
-    }
-    void thumb1_ADD_rri() {
-        ir.Unimplemented();
-    }
-    void thumb1_SUB_rri() {
-        ir.Unimplemented();
-    }
-    void thumb1_MOV_ri() {
-        ir.Unimplemented();
-    }
-    void thumb1_CMP_ri() {
-        ir.Unimplemented();
-    }
-    void thumb1_ADD_ri() {
-        ir.Unimplemented();
-    }
-    void thumb1_SUB_ri() {
-        ir.Unimplemented();
-    }
-    void thumb1_ANDS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_EORS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_LSLS_reg(Reg m, Reg d_n) {
-        const Reg d = d_n, n = d_n;
-        // LSLS <Rdn>, <Rm>
-        auto shift_n = ir.LeastSignificantByte(ir.GetRegister(m));
-        auto apsr_c = ir.GetCFlag();
-        auto result_carry = ir.LogicalShiftLeft(ir.GetRegister(d), shift_n, apsr_c);
-        ir.SetRegister(d, result_carry.result);
-        ir.SetNFlag(ir.MostSignificantBit(result_carry.result));
-        ir.SetZFlag(ir.IsZero(result_carry.result));
-        ir.SetCFlag(result_carry.carry);
-    }
-    void thumb1_LSRS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_ASRS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_ADCS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_SBCS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_RORS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_TST_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_NEGS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_CMP_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_CMN_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_ORRS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_MULS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_BICS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_MVNS_rr() {
-        ir.Unimplemented();
-    }
-    void thumb1_ADD_high() {
-        ir.Unimplemented();
-    }
-    void thumb1_CMP_high() {
-        ir.Unimplemented();
-    }
-    void thumb1_MOV_high() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDR_lit() {
-        ir.Unimplemented();
-    }
-    void thumb1_STR_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_STRH_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_STRB_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDRSB_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDR_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDRH_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDRB_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDRSH_rrr() {
-        ir.Unimplemented();
-    }
-    void thumb1_STRH_rri() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDRH_rri() {
-        ir.Unimplemented();
-    }
-    void thumb1_STR_sp() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDR_sp() {
-        ir.Unimplemented();
-    }
-    void thumb1_ADR() {
-        ir.Unimplemented();
-    }
-    void thumb1_ADD_sp() {
-        ir.Unimplemented();
-    }
-    void thumb1_ADD_spsp() {
-        ir.Unimplemented();
-    }
-    void thumb1_SUB_spsp() {
-        ir.Unimplemented();
-    }
-    void thumb1_SXTH() {
-        ir.Unimplemented();
-    }
-    void thumb1_SXTB() {
-        ir.Unimplemented();
-    }
-    void thumb1_UXTH() {
-        ir.Unimplemented();
-    }
-    void thumb1_UXTB() {
-        ir.Unimplemented();
-    }
-    void thumb1_PUSH() {
-        ir.Unimplemented();
-    }
-    void thumb1_POP() {
-        ir.Unimplemented();
-    }
-    void thumb1_SETEND() {
-        ir.Unimplemented();
-    }
-    void thumb1_CPS() {
-        ir.Unimplemented();
-    }
-    void thumb1_REV() {
-        ir.Unimplemented();
-    }
-    void thumb1_REV16() {
-        ir.Unimplemented();
-    }
-    void thumb1_REVSH() {
-        ir.Unimplemented();
-    }
-    void thumb1_BKPT() {
-        ir.Unimplemented();
-    }
-    void thumb1_STMIA() {
-        ir.Unimplemented();
-    }
-    void thumb1_LDMIA() {
-        ir.Unimplemented();
-    }
-    void thumb1_BX() {
-        ir.Unimplemented();
-    }
-    void thumb1_BLX() {
-        ir.Unimplemented();
-    }
-    void thumb1_BEQ() {
-        ir.Unimplemented();
-    }
-    void thumb1_BNE() {
-        ir.Unimplemented();
-    }
-    void thumb1_BCS() {
-        ir.Unimplemented();
-    }
-    void thumb1_BCC() {
-        ir.Unimplemented();
-    }
-    void thumb1_BMI() {
-        ir.Unimplemented();
-    }
-    void thumb1_BPL() {
-        ir.Unimplemented();
-    }
-    void thumb1_BVS() {
-        ir.Unimplemented();
-    }
-    void thumb1_BVC() {
-        ir.Unimplemented();
-    }
-    void thumb1_BHI() {
-        ir.Unimplemented();
-    }
-    void thumb1_BLS() {
-        ir.Unimplemented();
-    }
-    void thumb1_BGE() {
-        ir.Unimplemented();
-    }
-    void thumb1_BLT() {
-        ir.Unimplemented();
-    }
-    void thumb1_BGT() {
-        ir.Unimplemented();
-    }
-    void thumb1_BLE() {
-        ir.Unimplemented();
-    }
-    void thumb1_UDF() {
-        ir.Unimplemented();
-    }
-    void thumb1_SWI() {
-        ir.Unimplemented();
-    }
-    void thumb1_B() {
-        ir.Unimplemented();
-    }
-    void thumb1_BLX_suffix() {
-        ir.Unimplemented();
-    }
-    void thumb1_BLX_prefix() {
-        ir.Unimplemented();
-    }
-    void thumb1_BL_suffix() {
-        ir.Unimplemented();
-    }
-};
-
-} // namespace Arm
-} // namepsace Dynarmic
diff --git a/src/interface/interface.h b/src/interface/interface.h
index 3402ba51..7319c334 100644
--- a/src/interface/interface.h
+++ b/src/interface/interface.h
@@ -6,10 +6,15 @@
 
 #pragma once
 
+#include <memory>
+
 #include "common/common_types.h"
 
 namespace Dynarmic {
 
+class Jit;
+
+/// These function pointers may be inserted into compiled code.
 struct UserCallbacks {
     u8 (*MemoryRead8)(u32 vaddr);
     u16 (*MemoryRead16)(u32 vaddr);
@@ -21,9 +26,62 @@ struct UserCallbacks {
     void (*MemoryWrite32)(u32 vaddr, u32 value);
     void (*MemoryWrite64)(u32 vaddr, u64 value);
 
-    void (*InterpreterFallback)(u32 pc, void* jit_state);
+    bool (*IsReadOnlyMemory)(u32 vaddr);
 
-    bool (*SoftwareInterrupt)(u32 swi);
+    void (*InterpreterFallback)(u32 pc, Jit* jit);
+
+    bool (*CallSVC)(u32 swi);
+};
+
+class Jit final {
+public:
+    explicit Jit(Dynarmic::UserCallbacks callbacks);
+    ~Jit();
+
+    /**
+     * Runs the emulated CPU for about cycle_count cycles.
+     * Cannot be recursively called.
+     * @param cycle_count Estimated number of cycles to run the CPU for.
+     * @returns Actual cycle count.
+     */
+    size_t Run(size_t cycle_count);
+
+    /**
+     * Clears the code cache of all compiled code.
+     * Cannot be called from a callback.
+     * @param poison_memory If true, poisons memory to crash if any stray code pointers are called.
+     */
+    void ClearCache(bool poison_memory = true);
+
+    /**
+     * Stops execution in Jit::Run.
+     * Can only be called from a callback.
+     */
+    void HaltExecution();
+
+    /// View and modify registers.
+    std::array<u32, 16>& Regs();
+    std::array<u32, 16> Regs() const;
+
+    /// View and modify CPSR.
+    u32& Cpsr();
+    u32 Cpsr() const;
+
+    /**
+     * Returns true if Jit::Run was called but hasn't returned yet.
+     * i.e.: We're in a callback.
+     */
+    bool IsExecuting() const {
+        return is_executing;
+    }
+
+private:
+    bool halt_requested = false;
+    bool is_executing = false;
+    Dynarmic::UserCallbacks callbacks;
+
+    struct Impl;
+    std::unique_ptr<Impl> impl;
 };
 
 } // namespace Dynarmic
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
deleted file mode 100644
index 824354e3..00000000
--- a/src/tests/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-set(SRCS
-    arm/fuzz_thumb.cpp
-    arm/test_arm_disassembler.cpp
-    arm/test_thumb_instructions.cpp
-    main.cpp
-    )
-
-set(HEADERS
-    )
-
-source_group(tests FILES ${SRCS} ${HEADERS})
-add_executable(dynarmic_tests ${SRCS})
-target_link_libraries(dynarmic_tests dynarmic_common dynarmic_frontend_arm dynarmic_backend_x64)
-set_target_properties(dynarmic_tests PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 00000000..27431d3c
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,35 @@
+include_directories(.)
+
+set(SRCS
+    arm/fuzz_thumb.cpp
+    arm/test_arm_disassembler.cpp
+    arm/test_thumb_instructions.cpp
+    main.cpp
+    skyeye_interpreter/dyncom/arm_dyncom_dec.cpp
+    skyeye_interpreter/dyncom/arm_dyncom_interpreter.cpp
+    skyeye_interpreter/dyncom/arm_dyncom_thumb.cpp
+    skyeye_interpreter/skyeye_common/armstate.cpp
+    skyeye_interpreter/skyeye_common/armsupp.cpp
+    skyeye_interpreter/skyeye_common/vfp/vfp.cpp
+    skyeye_interpreter/skyeye_common/vfp/vfpdouble.cpp
+    skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp
+    skyeye_interpreter/skyeye_common/vfp/vfpsingle.cpp
+    )
+
+set(HEADERS
+    skyeye_interpreter/dyncom/arm_dyncom_dec.h
+    skyeye_interpreter/dyncom/arm_dyncom_interpreter.h
+    skyeye_interpreter/dyncom/arm_dyncom_run.h
+    skyeye_interpreter/dyncom/arm_dyncom_thumb.h
+    skyeye_interpreter/skyeye_common/armstate.h
+    skyeye_interpreter/skyeye_common/armsupp.h
+    skyeye_interpreter/skyeye_common/arm_regformat.h
+    skyeye_interpreter/skyeye_common/vfp/asm_vfp.h
+    skyeye_interpreter/skyeye_common/vfp/vfp.h
+    skyeye_interpreter/skyeye_common/vfp/vfp_helper.h
+    )
+
+source_group(dynarmic_tests FILES ${SRCS} ${HEADERS})
+add_executable(dynarmic_tests ${SRCS})
+target_link_libraries(dynarmic_tests dynarmic)
+set_target_properties(dynarmic_tests PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/src/tests/arm/fuzz_thumb.cpp b/tests/arm/fuzz_thumb.cpp
similarity index 86%
rename from src/tests/arm/fuzz_thumb.cpp
rename to tests/arm/fuzz_thumb.cpp
index 3197d735..d4252f9e 100644
--- a/src/tests/arm/fuzz_thumb.cpp
+++ b/tests/arm/fuzz_thumb.cpp
@@ -4,3 +4,5 @@
  * General Public License version 2 or any later version.
  */
 
+#include "interface/interface.h"
+
diff --git a/src/tests/arm/test_arm_disassembler.cpp b/tests/arm/test_arm_disassembler.cpp
similarity index 96%
rename from src/tests/arm/test_arm_disassembler.cpp
rename to tests/arm/test_arm_disassembler.cpp
index c287c423..97aa9783 100644
--- a/src/tests/arm/test_arm_disassembler.cpp
+++ b/tests/arm/test_arm_disassembler.cpp
@@ -6,7 +6,7 @@
 
 #include <catch.hpp>
 
-#include "frontend_arm/arm_disassembler.h"
+#include "frontend/disassembler_arm.h"
 
 TEST_CASE( "Disassemble branch instructions", "[arm][disassembler]" ) {
     REQUIRE(Dynarmic::Arm::DisassembleArm(0xEAFFFFFE) == "b +#0");
diff --git a/src/tests/arm/test_thumb_instructions.cpp b/tests/arm/test_thumb_instructions.cpp
similarity index 88%
rename from src/tests/arm/test_thumb_instructions.cpp
rename to tests/arm/test_thumb_instructions.cpp
index fc92982d..6fc2413a 100644
--- a/src/tests/arm/test_thumb_instructions.cpp
+++ b/tests/arm/test_thumb_instructions.cpp
@@ -8,8 +8,8 @@
 
 #include "backend_x64/emit_x64.h"
 #include "common/common_types.h"
-#include "frontend_arm/decoder/thumb1.h"
-#include "frontend_arm/translate_thumb.h"
+#include "frontend/decoder/thumb1.h"
+#include "frontend/translate_thumb.h"
 
 struct TinyBlockOfCode : Gen::XCodeBlock {
     TinyBlockOfCode() {
@@ -25,10 +25,10 @@ void RunSingleThumbInstruction(u16 thumb_instruction, Dynarmic::BackendX64::JitS
 
     TinyBlockOfCode block_of_code;
     Dynarmic::BackendX64::Routines routines;
-    Dynarmic::UserCallbacks callbacks;
+    Dynarmic::UserCallbacks callbacks{};
     Dynarmic::BackendX64::EmitX64 emitter(&block_of_code, &routines, callbacks);
 
-    Dynarmic::BackendX64::CodePtr code = emitter.Emit(visitor.ir.block);
+    Dynarmic::BackendX64::CodePtr code = emitter.Emit({0, true, false}, visitor.ir.block);
     routines.RunCode(jit_state_ptr, code, 1);
 }
 
diff --git a/src/tests/main.cpp b/tests/main.cpp
similarity index 100%
rename from src/tests/main.cpp
rename to tests/main.cpp
diff --git a/tests/skyeye_interpreter/dyncom/arm_dyncom_dec.cpp b/tests/skyeye_interpreter/dyncom/arm_dyncom_dec.cpp
new file mode 100644
index 00000000..d161e76e
--- /dev/null
+++ b/tests/skyeye_interpreter/dyncom/arm_dyncom_dec.cpp
@@ -0,0 +1,466 @@
+// Copyright 2012 Michael Kang, 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "tests/skyeye_interpreter/dyncom/arm_dyncom_dec.h"
+#include "tests/skyeye_interpreter/skyeye_common/armsupp.h"
+
+const InstructionSetEncodingItem arm_instruction[] = {
+    { "vmla", 5, ARMVFP2,      { 23, 27, 0x1C, 20, 21, 0x0, 9, 11, 0x5, 6, 6, 0, 4, 4, 0 }},
+    { "vmls", 5, ARMVFP2,      { 23, 27, 0x1C, 20, 21, 0x0, 9, 11, 0x5, 6, 6, 1, 4, 4, 0 }},
+    { "vnmla", 5, ARMVFP2,     { 23, 27, 0x1C, 20, 21, 0x1, 9, 11, 0x5, 6, 6, 1, 4, 4, 0 }},
+    { "vnmls", 5, ARMVFP2,     { 23, 27, 0x1C, 20, 21, 0x1, 9, 11, 0x5, 6, 6, 0, 4, 4, 0 }},
+    { "vnmul", 5, ARMVFP2,     { 23, 27, 0x1C, 20, 21, 0x2, 9, 11, 0x5, 6, 6, 1, 4, 4, 0 }},
+    { "vmul", 5, ARMVFP2,      { 23, 27, 0x1C, 20, 21, 0x2, 9, 11, 0x5, 6, 6, 0, 4, 4, 0 }},
+    { "vadd", 5, ARMVFP2,      { 23, 27, 0x1C, 20, 21, 0x3, 9, 11, 0x5, 6, 6, 0, 4, 4, 0 }},
+    { "vsub", 5, ARMVFP2,      { 23, 27, 0x1C, 20, 21, 0x3, 9, 11, 0x5, 6, 6, 1, 4, 4, 0 }},
+    { "vdiv", 5, ARMVFP2,      { 23, 27, 0x1D, 20, 21, 0x0, 9, 11, 0x5, 6, 6, 0, 4, 4, 0 }},
+    { "vmov(i)", 4, ARMVFP3,   { 23, 27, 0x1D, 20, 21, 0x3, 9, 11, 0x5, 4, 7, 0 }},
+    { "vmov(r)", 5, ARMVFP3,   { 23, 27, 0x1D, 16, 21, 0x30, 9, 11, 0x5, 6, 7, 1, 4, 4, 0 }},
+    { "vabs", 5, ARMVFP2,      { 23, 27, 0x1D, 16, 21, 0x30, 9, 11, 0x5, 6, 7, 3, 4, 4, 0 }},
+    { "vneg", 5, ARMVFP2,      { 23, 27, 0x1D, 17, 21, 0x18, 9, 11, 0x5, 6, 7, 1, 4, 4, 0 }},
+    { "vsqrt", 5, ARMVFP2,     { 23, 27, 0x1D, 16, 21, 0x31, 9, 11, 0x5, 6, 7, 3, 4, 4, 0 }},
+    { "vcmp", 5, ARMVFP2,      { 23, 27, 0x1D, 16, 21, 0x34, 9, 11, 0x5, 6, 6, 1, 4, 4, 0 }},
+    { "vcmp2", 5, ARMVFP2,     { 23, 27, 0x1D, 16, 21, 0x35, 9, 11, 0x5, 0, 6, 0x40 }},
+    { "vcvt(bds)", 5, ARMVFP2, { 23, 27, 0x1D, 16, 21, 0x37, 9, 11, 0x5, 6, 7, 3, 4, 4, 0 }},
+    { "vcvt(bff)", 6, ARMVFP3, { 23, 27, 0x1D, 19, 21, 0x7, 17, 17, 0x1, 9, 11, 5, 6, 6, 1 }},
+    { "vcvt(bfi)", 5, ARMVFP2, { 23, 27, 0x1D, 19, 21, 0x7, 9, 11, 0x5, 6, 6, 1, 4, 4, 0 }},
+    { "vmovbrs", 3, ARMVFP2,   { 21, 27, 0x70, 8, 11, 0xA, 0, 6, 0x10 }},
+    { "vmsr", 2, ARMVFP2,      { 20, 27, 0xEE, 0, 11, 0xA10 }},
+    { "vmovbrc", 4, ARMVFP2,   { 23, 27, 0x1C, 20, 20, 0x0, 8, 11, 0xB, 0, 4, 0x10 }},
+    { "vmrs", 2, ARMVFP2,      { 20, 27, 0xEF, 0, 11, 0xA10 }},
+    { "vmovbcr", 4, ARMVFP2,   { 24, 27, 0xE, 20, 20, 1, 8, 11, 0xB, 0, 4, 0x10 }},
+    { "vmovbrrss", 3, ARMVFP2, { 21, 27, 0x62, 8, 11, 0xA, 4, 4, 1 }},
+    { "vmovbrrd", 3, ARMVFP2,  { 21, 27, 0x62, 6, 11, 0x2C, 4, 4, 1 }},
+    { "vstr", 3, ARMVFP2,      { 24, 27, 0xD, 20, 21, 0, 9, 11, 5 }},
+    { "vpush", 3, ARMVFP2,     { 23, 27, 0x1A, 16, 21, 0x2D, 9, 11, 5 }},
+    { "vstm", 3, ARMVFP2,      { 25, 27, 0x6, 20, 20, 0, 9, 11, 5 }},
+    { "vpop", 3, ARMVFP2,      { 23, 27, 0x19, 16, 21, 0x3D, 9, 11, 5 }},
+    { "vldr", 3, ARMVFP2,      { 24, 27, 0xD, 20, 21, 1, 9, 11, 5 }},
+    { "vldm", 3, ARMVFP2,      { 25, 27, 0x6, 20, 20, 1, 9, 11, 5 }},
+
+    { "srs", 4, 6,         { 25, 31, 0x0000007c, 22, 22, 0x00000001, 16, 20, 0x0000000d, 8, 11, 0x00000005 }},
+    { "rfe", 4, 6,         { 25, 31, 0x0000007c, 22, 22, 0x00000000, 20, 20, 0x00000001, 8, 11, 0x0000000a }},
+    { "bkpt", 2, 3,        { 20, 27, 0x00000012, 4, 7, 0x00000007 }},
+    { "blx", 1, 3,         { 25, 31, 0x0000007d }},
+    { "cps", 3, 6,         { 20, 31, 0x00000f10, 16, 16, 0x00000000, 5, 5, 0x00000000 }},
+    { "pld", 4, 4,         { 26, 31, 0x0000003d, 24, 24, 0x00000001, 20, 22, 0x00000005, 12, 15, 0x0000000f }},
+    { "setend", 2, 6,      { 16, 31, 0x0000f101, 4, 7, 0x00000000 }},
+    { "clrex", 1, 6,       { 0, 31, 0xf57ff01f }},
+    { "rev16", 2, 6,       { 16, 27, 0x000006bf, 4, 11, 0x000000fb }},
+    { "usad8", 3, 6,       { 20, 27, 0x00000078, 12, 15, 0x0000000f, 4, 7, 0x00000001 }},
+    { "sxtb", 2, 6,        { 16, 27, 0x000006af, 4, 7, 0x00000007 }},
+    { "uxtb", 2, 6,        { 16, 27, 0x000006ef, 4, 7, 0x00000007 }},
+    { "sxth", 2, 6,        { 16, 27, 0x000006bf, 4, 7, 0x00000007 }},
+    { "sxtb16", 2, 6,      { 16, 27, 0x0000068f, 4, 7, 0x00000007 }},
+    { "uxth", 2, 6,        { 16, 27, 0x000006ff, 4, 7, 0x00000007 }},
+    { "uxtb16", 2, 6,      { 16, 27, 0x000006cf, 4, 7, 0x00000007 }},
+    { "cpy", 2, 6,         { 20, 27, 0x0000001a, 4, 11, 0x00000000 }},
+    { "uxtab", 2, 6,       { 20, 27, 0x0000006e, 4, 9, 0x00000007 }},
+    { "ssub8", 2, 6,       { 20, 27, 0x00000061, 4, 7, 0x0000000f }},
+    { "shsub8", 2, 6,      { 20, 27, 0x00000063, 4, 7, 0x0000000f }},
+    { "ssubaddx", 2, 6,    { 20, 27, 0x00000061, 4, 7, 0x00000005 }},
+    { "strex", 2, 6,       { 20, 27, 0x00000018, 4, 7, 0x00000009 }},
+    { "strexb", 2, 7,      { 20, 27, 0x0000001c, 4, 7, 0x00000009 }},
+    { "swp", 2, 0,         { 20, 27, 0x00000010, 4, 7, 0x00000009 }},
+    { "swpb", 2, 0,        { 20, 27, 0x00000014, 4, 7, 0x00000009 }},
+    { "ssub16", 2, 6,      { 20, 27, 0x00000061, 4, 7, 0x00000007 }},
+    { "ssat16", 2, 6,      { 20, 27, 0x0000006a, 4, 7, 0x00000003 }},
+    { "shsubaddx", 2, 6,   { 20, 27, 0x00000063, 4, 7, 0x00000005 }},
+    { "qsubaddx", 2, 6,    { 20, 27, 0x00000062, 4, 7, 0x00000005 }},
+    { "shaddsubx", 2, 6,   { 20, 27, 0x00000063, 4, 7, 0x00000003 }},
+    { "shadd8", 2, 6,      { 20, 27, 0x00000063, 4, 7, 0x00000009 }},
+    { "shadd16", 2, 6,     { 20, 27, 0x00000063, 4, 7, 0x00000001 }},
+    { "sel", 2, 6,         { 20, 27, 0x00000068, 4, 7, 0x0000000b }},
+    { "saddsubx", 2, 6,    { 20, 27, 0x00000061, 4, 7, 0x00000003 }},
+    { "sadd8", 2, 6,       { 20, 27, 0x00000061, 4, 7, 0x00000009 }},
+    { "sadd16", 2, 6,      { 20, 27, 0x00000061, 4, 7, 0x00000001 }},
+    { "shsub16", 2, 6,     { 20, 27, 0x00000063, 4, 7, 0x00000007 }},
+    { "umaal", 2, 6,       { 20, 27, 0x00000004, 4, 7, 0x00000009 }},
+    { "uxtab16", 2, 6,     { 20, 27, 0x0000006c, 4, 7, 0x00000007 }},
+    { "usubaddx", 2, 6,    { 20, 27, 0x00000065, 4, 7, 0x00000005 }},
+    { "usub8", 2, 6,       { 20, 27, 0x00000065, 4, 7, 0x0000000f }},
+    { "usub16", 2, 6,      { 20, 27, 0x00000065, 4, 7, 0x00000007 }},
+    { "usat16", 2, 6,      { 20, 27, 0x0000006e, 4, 7, 0x00000003 }},
+    { "usada8", 2, 6,      { 20, 27, 0x00000078, 4, 7, 0x00000001 }},
+    { "uqsubaddx", 2, 6,   { 20, 27, 0x00000066, 4, 7, 0x00000005 }},
+    { "uqsub8", 2, 6,      { 20, 27, 0x00000066, 4, 7, 0x0000000f }},
+    { "uqsub16", 2, 6,     { 20, 27, 0x00000066, 4, 7, 0x00000007 }},
+    { "uqaddsubx", 2, 6,   { 20, 27, 0x00000066, 4, 7, 0x00000003 }},
+    { "uqadd8", 2, 6,      { 20, 27, 0x00000066, 4, 7, 0x00000009 }},
+    { "uqadd16", 2, 6,     { 20, 27, 0x00000066, 4, 7, 0x00000001 }},
+    { "sxtab", 2, 6,       { 20, 27, 0x0000006a, 4, 7, 0x00000007 }},
+    { "uhsubaddx", 2, 6,   { 20, 27, 0x00000067, 4, 7, 0x00000005 }},
+    { "uhsub8", 2, 6,      { 20, 27, 0x00000067, 4, 7, 0x0000000f }},
+    { "uhsub16", 2, 6,     { 20, 27, 0x00000067, 4, 7, 0x00000007 }},
+    { "uhaddsubx", 2, 6,   { 20, 27, 0x00000067, 4, 7, 0x00000003 }},
+    { "uhadd8", 2, 6,      { 20, 27, 0x00000067, 4, 7, 0x00000009 }},
+    { "uhadd16", 2, 6,     { 20, 27, 0x00000067, 4, 7, 0x00000001 }},
+    { "uaddsubx", 2, 6,    { 20, 27, 0x00000065, 4, 7, 0x00000003 }},
+    { "uadd8", 2, 6,       { 20, 27, 0x00000065, 4, 7, 0x00000009 }},
+    { "uadd16", 2, 6,      { 20, 27, 0x00000065, 4, 7, 0x00000001 }},
+    { "sxtah", 2, 6,       { 20, 27, 0x0000006b, 4, 7, 0x00000007 }},
+    { "sxtab16", 2, 6,     { 20, 27, 0x00000068, 4, 7, 0x00000007 }},
+    { "qadd8", 2, 6,       { 20, 27, 0x00000062, 4, 7, 0x00000009 }},
+    { "bxj", 2, 5,         { 20, 27, 0x00000012, 4, 7, 0x00000002 }},
+    { "clz", 2, 3,         { 20, 27, 0x00000016, 4, 7, 0x00000001 }},
+    { "uxtah", 2, 6,       { 20, 27, 0x0000006f, 4, 7, 0x00000007 }},
+    { "bx", 2, 2,          { 20, 27, 0x00000012, 4, 7, 0x00000001 }},
+    { "rev", 2, 6,         { 20, 27, 0x0000006b, 4, 7, 0x00000003 }},
+    { "blx", 2, 3,         { 20, 27, 0x00000012, 4, 7, 0x00000003 }},
+    { "revsh", 2, 6,       { 20, 27, 0x0000006f, 4, 7, 0x0000000b }},
+    { "qadd", 2, 4,        { 20, 27, 0x00000010, 4, 7, 0x00000005 }},
+    { "qadd16", 2, 6,      { 20, 27, 0x00000062, 4, 7, 0x00000001 }},
+    { "qaddsubx", 2, 6,    { 20, 27, 0x00000062, 4, 7, 0x00000003 }},
+    { "ldrex", 2, 0,       { 20, 27, 0x00000019, 4, 7, 0x00000009 }},
+    { "qdadd", 2, 4,       { 20, 27, 0x00000014, 4, 7, 0x00000005 }},
+    { "qdsub", 2, 4,       { 20, 27, 0x00000016, 4, 7, 0x00000005 }},
+    { "qsub", 2, 4,        { 20, 27, 0x00000012, 4, 7, 0x00000005 }},
+    { "ldrexb", 2, 7,      { 20, 27, 0x0000001d, 4, 7, 0x00000009 }},
+    { "qsub8", 2, 6,       { 20, 27, 0x00000062, 4, 7, 0x0000000f }},
+    { "qsub16", 2, 6,      { 20, 27, 0x00000062, 4, 7, 0x00000007 }},
+    { "smuad", 4, 6,       { 20, 27, 0x00000070, 12, 15, 0x0000000f, 6, 7, 0x00000000, 4, 4, 0x00000001 }},
+    { "smmul", 4, 6,       { 20, 27, 0x00000075, 12, 15, 0x0000000f, 6, 7, 0x00000000, 4, 4, 0x00000001 }},
+    { "smusd", 4, 6,       { 20, 27, 0x00000070, 12, 15, 0x0000000f, 6, 7, 0x00000001, 4, 4, 0x00000001 }},
+    { "smlsd", 3, 6,       { 20, 27, 0x00000070, 6, 7, 0x00000001, 4, 4, 0x00000001 }},
+    { "smlsld", 3, 6,      { 20, 27, 0x00000074, 6, 7, 0x00000001, 4, 4, 0x00000001 }},
+    { "smmla", 3, 6,       { 20, 27, 0x00000075, 6, 7, 0x00000000, 4, 4, 0x00000001 }},
+    { "smmls", 3, 6,       { 20, 27, 0x00000075, 6, 7, 0x00000003, 4, 4, 0x00000001 }},
+    { "smlald", 3, 6,      { 20, 27, 0x00000074, 6, 7, 0x00000000, 4, 4, 0x00000001 }},
+    { "smlad", 3, 6,       { 20, 27, 0x00000070, 6, 7, 0x00000000, 4, 4, 0x00000001 }},
+    { "smlaw", 3, 4,       { 20, 27, 0x00000012, 7, 7, 0x00000001, 4, 5, 0x00000000 }},
+    { "smulw", 3, 4,       { 20, 27, 0x00000012, 7, 7, 0x00000001, 4, 5, 0x00000002 }},
+    { "pkhtb", 2, 6,       { 20, 27, 0x00000068, 4, 6, 0x00000005 }},
+    { "pkhbt", 2, 6,       { 20, 27, 0x00000068, 4, 6, 0x00000001 }},
+    { "smul", 3, 4,        { 20, 27, 0x00000016, 7, 7, 0x00000001, 4, 4, 0x00000000 }},
+    { "smlalxy", 3, 4,     { 20, 27, 0x00000014, 7, 7, 0x00000001, 4, 4, 0x00000000 }},
+    { "smla", 3, 4,        { 20, 27, 0x00000010, 7, 7, 0x00000001, 4, 4, 0x00000000 }},
+    { "mcrr", 1, 6,        { 20, 27, 0x000000c4 }},
+    { "mrrc", 1, 6,        { 20, 27, 0x000000c5 }},
+    { "cmp", 2, 0,         { 26, 27, 0x00000000, 20, 24, 0x00000015 }},
+    { "tst", 2, 0,         { 26, 27, 0x00000000, 20, 24, 0x00000011 }},
+    { "teq", 2, 0,         { 26, 27, 0x00000000, 20, 24, 0x00000013 }},
+    { "cmn", 2, 0,         { 26, 27, 0x00000000, 20, 24, 0x00000017 }},
+    { "smull", 2, 0,       { 21, 27, 0x00000006, 4, 7, 0x00000009 }},
+    { "umull", 2, 0,       { 21, 27, 0x00000004, 4, 7, 0x00000009 }},
+    { "umlal", 2, 0,       { 21, 27, 0x00000005, 4, 7, 0x00000009 }},
+    { "smlal", 2, 0,       { 21, 27, 0x00000007, 4, 7, 0x00000009 }},
+    { "mul", 2, 0,         { 21, 27, 0x00000000, 4, 7, 0x00000009 }},
+    { "mla", 2, 0,         { 21, 27, 0x00000001, 4, 7, 0x00000009 }},
+    { "ssat", 2, 6,        { 21, 27, 0x00000035, 4, 5, 0x00000001 }},
+    { "usat", 2, 6,        { 21, 27, 0x00000037, 4, 5, 0x00000001 }},
+    { "mrs", 4, 0,         { 23, 27, 0x00000002, 20, 21, 0x00000000, 16, 19, 0x0000000f, 0, 11, 0x00000000 }},
+    { "msr", 3, 0,         { 23, 27, 0x00000002, 20, 21, 0x00000002, 4, 7, 0x00000000 }},
+    { "and", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000000 }},
+    { "bic", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x0000000e }},
+    { "ldm", 3, 0,         { 25, 27, 0x00000004, 20, 22, 0x00000005, 15, 15, 0x00000000 }},
+    { "eor", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000001 }},
+    { "add", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000004 }},
+    { "rsb", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000003 }},
+    { "rsc", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000007 }},
+    { "sbc", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000006 }},
+    { "adc", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000005 }},
+    { "sub", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x00000002 }},
+    { "orr", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x0000000c }},
+    { "mvn", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x0000000f }},
+    { "mov", 2, 0,         { 26, 27, 0x00000000, 21, 24, 0x0000000d }},
+    { "stm", 2, 0,         { 25, 27, 0x00000004, 20, 22, 0x00000004 }},
+    { "ldm", 4, 0,         { 25, 27, 0x00000004, 22, 22, 0x00000001, 20, 20, 0x00000001, 15, 15, 0x00000001 }},
+    { "ldrsh", 3, 2,       { 25, 27, 0x00000000, 20, 20, 0x00000001, 4, 7, 0x0000000f }},
+    { "stm", 3, 0,         { 25, 27, 0x00000004, 22, 22, 0x00000000, 20, 20, 0x00000000 }},
+    { "ldm", 3, 0,         { 25, 27, 0x00000004, 22, 22, 0x00000000, 20, 20, 0x00000001 }},
+    { "ldrsb", 3, 2,       { 25, 27, 0x00000000, 20, 20, 0x00000001, 4, 7, 0x0000000d }},
+    { "strd", 3, 4,        { 25, 27, 0x00000000, 20, 20, 0x00000000, 4, 7, 0x0000000f }},
+    { "ldrh", 3, 0,        { 25, 27, 0x00000000, 20, 20, 0x00000001, 4, 7, 0x0000000b }},
+    { "strh", 3, 0,        { 25, 27, 0x00000000, 20, 20, 0x00000000, 4, 7, 0x0000000b }},
+    { "ldrd", 3, 4,        { 25, 27, 0x00000000, 20, 20, 0x00000000, 4, 7, 0x0000000d }},
+    { "strt", 3, 0,        { 26, 27, 0x00000001, 24, 24, 0x00000000, 20, 22, 0x00000002 }},
+    { "strbt", 3, 0,       { 26, 27, 0x00000001, 24, 24, 0x00000000, 20, 22, 0x00000006 }},
+    { "ldrbt", 3, 0,       { 26, 27, 0x00000001, 24, 24, 0x00000000, 20, 22, 0x00000007 }},
+    { "ldrt", 3, 0,        { 26, 27, 0x00000001, 24, 24, 0x00000000, 20, 22, 0x00000003 }},
+    { "mrc", 3, 6,         { 24, 27, 0x0000000e, 20, 20, 0x00000001, 4, 4, 0x00000001 }},
+    { "mcr", 3, 0,         { 24, 27, 0x0000000e, 20, 20, 0x00000000, 4, 4, 0x00000001 }},
+    { "msr", 3, 0,         { 23, 27, 0x00000006, 20, 21, 0x00000002, 22, 22, 0x00000001 }},
+    { "msr", 4, 0,         { 23, 27, 0x00000006, 20, 21, 0x00000002, 22, 22, 0x00000000, 16, 19, 0x00000004 }},
+    { "msr", 5, 0,         { 23, 27, 0x00000006, 20, 21, 0x00000002, 22, 22, 0x00000000, 19, 19, 0x00000001, 16, 17, 0x00000000 }},
+    { "msr", 4, 0,         { 23, 27, 0x00000006, 20, 21, 0x00000002, 22, 22, 0x00000000, 16, 17, 0x00000001 }},
+    { "msr", 4, 0,         { 23, 27, 0x00000006, 20, 21, 0x00000002, 22, 22, 0x00000000, 17, 17, 0x00000001 }},
+    { "ldrb", 3, 0,        { 26, 27, 0x00000001, 22, 22, 0x00000001, 20, 20, 0x00000001 }},
+    { "strb", 3, 0,        { 26, 27, 0x00000001, 22, 22, 0x00000001, 20, 20, 0x00000000 }},
+    { "ldr", 4, 0,         { 28, 31, 0x0000000e, 26, 27, 0x00000001, 22, 22, 0x00000000, 20, 20, 0x00000001 }},
+    { "ldrcond", 3, 0,     { 26, 27, 0x00000001, 22, 22, 0x00000000, 20, 20, 0x00000001 }},
+    { "str", 3, 0,         { 26, 27, 0x00000001, 22, 22, 0x00000000, 20, 20, 0x00000000 }},
+    { "cdp", 2, 0,         { 24, 27, 0x0000000e, 4, 4, 0x00000000 }},
+    { "stc", 2, 0,         { 25, 27, 0x00000006, 20, 20, 0x00000000 }},
+    { "ldc", 2, 0,         { 25, 27, 0x00000006, 20, 20, 0x00000001 }},
+    { "ldrexd", 2, ARMV6K, { 20, 27, 0x0000001B, 4, 7, 0x00000009 }},
+    { "strexd", 2, ARMV6K, { 20, 27, 0x0000001A, 4, 7, 0x00000009 }},
+    { "ldrexh", 2, ARMV6K, { 20, 27, 0x0000001F, 4, 7, 0x00000009 }},
+    { "strexh", 2, ARMV6K, { 20, 27, 0x0000001E, 4, 7, 0x00000009 }},
+    { "nop", 5, ARMV6K,    { 23, 27, 0x00000006, 22, 22, 0x00000000, 20, 21, 0x00000002, 16, 19, 0x00000000, 0, 7, 0x00000000 }},
+    { "yield", 5, ARMV6K,  { 23, 27, 0x00000006, 22, 22, 0x00000000, 20, 21, 0x00000002, 16, 19, 0x00000000, 0, 7, 0x00000001 }},
+    { "wfe", 5, ARMV6K,    { 23, 27, 0x00000006, 22, 22, 0x00000000, 20, 21, 0x00000002, 16, 19, 0x00000000, 0, 7, 0x00000002 }},
+    { "wfi", 5, ARMV6K,    { 23, 27, 0x00000006, 22, 22, 0x00000000, 20, 21, 0x00000002, 16, 19, 0x00000000, 0, 7, 0x00000003 }},
+    { "sev", 5, ARMV6K,    { 23, 27, 0x00000006, 22, 22, 0x00000000, 20, 21, 0x00000002, 16, 19, 0x00000000, 0, 7, 0x00000004 }},
+    { "swi", 1, 0,         { 24, 27, 0x0000000f }},
+    { "bbl", 1, 0,         { 25, 27, 0x00000005 }},
+};
+
+const InstructionSetEncodingItem arm_exclusion_code[] = {
+    { "vmla", 0, ARMVFP2,      { 0 }},
+    { "vmls", 0, ARMVFP2,      { 0 }},
+    { "vnmla", 0, ARMVFP2,     { 0 }},
+    { "vnmls", 0, ARMVFP2,     { 0 }},
+    { "vnmul", 0, ARMVFP2,     { 0 }},
+    { "vmul", 0, ARMVFP2,      { 0 }},
+    { "vadd", 0, ARMVFP2,      { 0 }},
+    { "vsub", 0, ARMVFP2,      { 0 }},
+    { "vdiv", 0, ARMVFP2,      { 0 }},
+    { "vmov(i)", 0, ARMVFP3,   { 0 }},
+    { "vmov(r)", 0, ARMVFP3,   { 0 }},
+    { "vabs", 0, ARMVFP2,      { 0 }},
+    { "vneg", 0, ARMVFP2,      { 0 }},
+    { "vsqrt", 0, ARMVFP2,     { 0 }},
+    { "vcmp", 0, ARMVFP2,      { 0 }},
+    { "vcmp2", 0, ARMVFP2,     { 0 }},
+    { "vcvt(bff)", 0, ARMVFP3, { 4, 4, 1 }},
+    { "vcvt(bds)", 0, ARMVFP2, { 0 }},
+    { "vcvt(bfi)", 0, ARMVFP2, { 0 }},
+    { "vmovbrs", 0, ARMVFP2,   { 0 }},
+    { "vmsr", 0, ARMVFP2,      { 0 }},
+    { "vmovbrc", 0, ARMVFP2,   { 0 }},
+    { "vmrs", 0, ARMVFP2,      { 0 }},
+    { "vmovbcr", 0, ARMVFP2,   { 0 }},
+    { "vmovbrrss", 0, ARMVFP2, { 0 }},
+    { "vmovbrrd", 0, ARMVFP2,  { 0 }},
+    { "vstr", 0, ARMVFP2,      { 0 }},
+    { "vpush", 0, ARMVFP2,     { 0 }},
+    { "vstm", 0, ARMVFP2,      { 0 }},
+    { "vpop", 0, ARMVFP2,      { 0 }},
+    { "vldr", 0, ARMVFP2,      { 0 }},
+    { "vldm", 0, ARMVFP2,      { 0 }},
+
+    { "srs", 0, 6,         { 0 }},
+    { "rfe", 0, 6,         { 0 }},
+    { "bkpt", 0, 3,        { 0 }},
+    { "blx", 0, 3,         { 0 }},
+    { "cps", 0, 6,         { 0 }},
+    { "pld", 0, 4,         { 0 }},
+    { "setend", 0, 6,      { 0 }},
+    { "clrex", 0, 6,       { 0 }},
+    { "rev16", 0, 6,       { 0 }},
+    { "usad8", 0, 6,       { 0 }},
+    { "sxtb", 0, 6,        { 0 }},
+    { "uxtb", 0, 6,        { 0 }},
+    { "sxth", 0, 6,        { 0 }},
+    { "sxtb16", 0, 6,      { 0 }},
+    { "uxth", 0, 6,        { 0 }},
+    { "uxtb16", 0, 6,      { 0 }},
+    { "cpy", 0, 6,         { 0 }},
+    { "uxtab", 0, 6,       { 0 }},
+    { "ssub8", 0, 6,       { 0 }},
+    { "shsub8", 0, 6,      { 0 }},
+    { "ssubaddx", 0, 6,    { 0 }},
+    { "strex", 0, 6,       { 0 }},
+    { "strexb", 0, 7,      { 0 }},
+    { "swp", 0, 0,         { 0 }},
+    { "swpb", 0, 0,        { 0 }},
+    { "ssub16", 0, 6,      { 0 }},
+    { "ssat16", 0, 6,      { 0 }},
+    { "shsubaddx", 0, 6,   { 0 }},
+    { "qsubaddx", 0, 6,    { 0 }},
+    { "shaddsubx", 0, 6,   { 0 }},
+    { "shadd8", 0, 6,      { 0 }},
+    { "shadd16", 0, 6,     { 0 }},
+    { "sel", 0, 6,         { 0 }},
+    { "saddsubx", 0, 6,    { 0 }},
+    { "sadd8", 0, 6,       { 0 }},
+    { "sadd16", 0, 6,      { 0 }},
+    { "shsub16", 0, 6,     { 0 }},
+    { "umaal", 0, 6,       { 0 }},
+    { "uxtab16", 0, 6,     { 0 }},
+    { "usubaddx", 0, 6,    { 0 }},
+    { "usub8", 0, 6,       { 0 }},
+    { "usub16", 0, 6,      { 0 }},
+    { "usat16", 0, 6,      { 0 }},
+    { "usada8", 0, 6,      { 0 }},
+    { "uqsubaddx", 0, 6,   { 0 }},
+    { "uqsub8", 0, 6,      { 0 }},
+    { "uqsub16", 0, 6,     { 0 }},
+    { "uqaddsubx", 0, 6,   { 0 }},
+    { "uqadd8", 0, 6,      { 0 }},
+    { "uqadd16", 0, 6,     { 0 }},
+    { "sxtab", 0, 6,       { 0 }},
+    { "uhsubaddx", 0, 6,   { 0 }},
+    { "uhsub8", 0, 6,      { 0 }},
+    { "uhsub16", 0, 6,     { 0 }},
+    { "uhaddsubx", 0, 6,   { 0 }},
+    { "uhadd8", 0, 6,      { 0 }},
+    { "uhadd16", 0, 6,     { 0 }},
+    { "uaddsubx", 0, 6,    { 0 }},
+    { "uadd8", 0, 6,       { 0 }},
+    { "uadd16", 0, 6,      { 0 }},
+    { "sxtah", 0, 6,       { 0 }},
+    { "sxtab16", 0, 6,     { 0 }},
+    { "qadd8", 0, 6,       { 0 }},
+    { "bxj", 0, 5,         { 0 }},
+    { "clz", 0, 3,         { 0 }},
+    { "uxtah", 0, 6,       { 0 }},
+    { "bx", 0, 2,          { 0 }},
+    { "rev", 0, 6,         { 0 }},
+    { "blx", 0, 3,         { 0 }},
+    { "revsh", 0, 6,       { 0 }},
+    { "qadd", 0, 4,        { 0 }},
+    { "qadd16", 0, 6,      { 0 }},
+    { "qaddsubx", 0, 6,    { 0 }},
+    { "ldrex", 0, 0,       { 0 }},
+    { "qdadd", 0, 4,       { 0 }},
+    { "qdsub", 0, 4,       { 0 }},
+    { "qsub", 0, 4,        { 0 }},
+    { "ldrexb", 0, 7,      { 0 }},
+    { "qsub8", 0, 6,       { 0 }},
+    { "qsub16", 0, 6,      { 0 }},
+    { "smuad", 0, 6,       { 0 }},
+    { "smmul", 0, 6,       { 0 }},
+    { "smusd", 0, 6,       { 0 }},
+    { "smlsd", 0, 6,       { 0 }},
+    { "smlsld", 0, 6,      { 0 }},
+    { "smmla", 0, 6,       { 0 }},
+    { "smmls", 0, 6,       { 0 }},
+    { "smlald", 0, 6,      { 0 }},
+    { "smlad", 0, 6,       { 0 }},
+    { "smlaw", 0, 4,       { 0 }},
+    { "smulw", 0, 4,       { 0 }},
+    { "pkhtb", 0, 6,       { 0 }},
+    { "pkhbt", 0, 6,       { 0 }},
+    { "smul", 0, 4,        { 0 }},
+    { "smlal", 0, 4,       { 0 }},
+    { "smla", 0, 4,        { 0 }},
+    { "mcrr", 0, 6,        { 0 }},
+    { "mrrc", 0, 6,        { 0 }},
+    { "cmp", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "tst", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "teq", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "cmn", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "smull", 0, 0,       { 0 }},
+    { "umull", 0, 0,       { 0 }},
+    { "umlal", 0, 0,       { 0 }},
+    { "smlal", 0, 0,       { 0 }},
+    { "mul", 0, 0,         { 0 }},
+    { "mla", 0, 0,         { 0 }},
+    { "ssat", 0, 6,        { 0 }},
+    { "usat", 0, 6,        { 0 }},
+    { "mrs", 0, 0,         { 0 }},
+    { "msr", 0, 0,         { 0 }},
+    { "and", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "bic", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "ldm", 0, 0,         { 0 }},
+    { "eor", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "add", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "rsb", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "rsc", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "sbc", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "adc", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "sub", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "orr", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "mvn", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "mov", 3, 0,         { 4, 4, 0x00000001, 7, 7, 0x00000001, 25, 25, 0x00000000 }},
+    { "stm", 0, 0,         { 0 }},
+    { "ldm", 0, 0,         { 0 }},
+    { "ldrsh", 0, 2,       { 0 }},
+    { "stm", 0, 0,         { 0 }},
+    { "ldm", 0, 0,         { 0 }},
+    { "ldrsb", 0, 2,       { 0 }},
+    { "strd", 0, 4,        { 0 }},
+    { "ldrh", 0, 0,        { 0 }},
+    { "strh", 0, 0,        { 0 }},
+    { "ldrd", 0, 4,        { 0 }},
+    { "strt", 0, 0,        { 0 }},
+    { "strbt", 0, 0,       { 0 }},
+    { "ldrbt", 0, 0,       { 0 }},
+    { "ldrt", 0, 0,        { 0 }},
+    { "mrc", 0, 6,         { 0 }},
+    { "mcr", 0, 0,         { 0 }},
+    { "msr", 0, 0,         { 0 }},
+    { "msr", 0, 0,         { 0 }},
+    { "msr", 0, 0,         { 0 }},
+    { "msr", 0, 0,         { 0 }},
+    { "msr", 0, 0,         { 0 }},
+    { "ldrb", 0, 0,        { 0 }},
+    { "strb", 0, 0,        { 0 }},
+    { "ldr", 0, 0,         { 0 }},
+    { "ldrcond", 1, 0,     { 28, 31, 0x0000000e }},
+    { "str", 0, 0,         { 0 }},
+    { "cdp", 0, 0,         { 0 }},
+    { "stc", 0, 0,         { 0 }},
+    { "ldc", 0, 0,         { 0 }},
+    { "ldrexd", 0, ARMV6K, { 0 }},
+    { "strexd", 0, ARMV6K, { 0 }},
+    { "ldrexh", 0, ARMV6K, { 0 }},
+    { "strexh", 0, ARMV6K, { 0 }},
+    { "nop", 0, ARMV6K,    { 0 }},
+    { "yield", 0, ARMV6K,  { 0 }},
+    { "wfe", 0, ARMV6K,    { 0 }},
+    { "wfi", 0, ARMV6K,    { 0 }},
+    { "sev", 0, ARMV6K,    { 0 }},
+    { "swi", 0, 0,         { 0 }},
+    { "bbl", 0, 0,         { 0 }},
+
+    { "bl_1_thumb", 0, INVALID,  { 0 }}, // Should be table[-4]
+    { "bl_2_thumb", 0, INVALID,  { 0 }}, // Should be located at the end of the table[-3]
+    { "blx_1_thumb", 0, INVALID, { 0 }}, // Should be located at table[-2]
+    { "invalid", 0, INVALID,     { 0 }}
+};
+
+ARMDecodeStatus DecodeARMInstruction(u32 instr, s32* idx) {
+    int n = 0;
+    int base = 0;
+    int instr_slots = sizeof(arm_instruction) / sizeof(InstructionSetEncodingItem);
+    ARMDecodeStatus ret = ARMDecodeStatus::FAILURE;
+
+    for (int i = 0; i < instr_slots; i++) {
+        n = arm_instruction[i].attribute_value;
+        base = 0;
+
+        while (n) {
+            if (arm_instruction[i].content[base + 1] == 31 && arm_instruction[i].content[base] == 0) {
+                // clrex
+                if (instr != arm_instruction[i].content[base + 2]) {
+                    break;
+                }
+            } else if (BITS(instr, arm_instruction[i].content[base], arm_instruction[i].content[base + 1]) != arm_instruction[i].content[base + 2]) {
+                break;
+            }
+            base += 3;
+            n--;
+        }
+
+        // All conditions are satisfied.
+        if (n == 0)
+            ret = ARMDecodeStatus::SUCCESS;
+
+        if (ret == ARMDecodeStatus::SUCCESS) {
+            n = arm_exclusion_code[i].attribute_value;
+            if (n != 0) {
+                base = 0;
+                while (n) {
+                    if (BITS(instr, arm_exclusion_code[i].content[base], arm_exclusion_code[i].content[base + 1]) != arm_exclusion_code[i].content[base + 2]) {
+                        break;
+                    }
+                    base += 3;
+                    n--;
+                }
+
+                // All conditions are satisfied.
+                if (n == 0)
+                    ret = ARMDecodeStatus::FAILURE;
+            }
+        }
+
+        if (ret == ARMDecodeStatus::SUCCESS) {
+            *idx = i;
+            return ret;
+        }
+    }
+    return ret;
+}
diff --git a/tests/skyeye_interpreter/dyncom/arm_dyncom_dec.h b/tests/skyeye_interpreter/dyncom/arm_dyncom_dec.h
new file mode 100644
index 00000000..d7170e0f
--- /dev/null
+++ b/tests/skyeye_interpreter/dyncom/arm_dyncom_dec.h
@@ -0,0 +1,39 @@
+// Copyright 2012 Michael Kang, 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+enum class ARMDecodeStatus {
+    SUCCESS,
+    FAILURE
+};
+
+ARMDecodeStatus DecodeARMInstruction(u32 instr, s32* idx);
+
+struct InstructionSetEncodingItem {
+    const char *name;
+    int attribute_value;
+    int version;
+    u32 content[21];
+};
+
+// ARM versions
+enum {
+    INVALID = 0,
+    ARMALL,
+    ARMV4,
+    ARMV4T,
+    ARMV5T,
+    ARMV5TE,
+    ARMV5TEJ,
+    ARMV6,
+    ARM1176JZF_S,
+    ARMVFP2,
+    ARMVFP3,
+    ARMV6K,
+};
+
+extern const InstructionSetEncodingItem arm_instruction[];
diff --git a/tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.cpp b/tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.cpp
new file mode 100644
index 00000000..b8732f92
--- /dev/null
+++ b/tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.cpp
@@ -0,0 +1,6876 @@
+// Copyright 2012 Michael Kang, 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#define CITRA_IGNORE_EXIT(x)
+
+#include <algorithm>
+#include <cstdio>
+
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+#include "tests/skyeye_interpreter/dyncom/arm_dyncom_dec.h"
+#include "tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.h"
+#include "tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.h"
+#include "tests/skyeye_interpreter/dyncom/arm_dyncom_run.h"
+#include "tests/skyeye_interpreter/skyeye_common/armstate.h"
+#include "tests/skyeye_interpreter/skyeye_common/armsupp.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp.h"
+
+enum {
+    COND            = (1 << 0),
+    NON_BRANCH      = (1 << 1),
+    DIRECT_BRANCH   = (1 << 2),
+    INDIRECT_BRANCH = (1 << 3),
+    CALL            = (1 << 4),
+    RET             = (1 << 5),
+    END_OF_PAGE     = (1 << 6),
+    THUMB           = (1 << 7),
+    SINGLE_STEP     = (1 << 8)
+};
+
+#define RM    BITS(sht_oper, 0, 3)
+#define RS    BITS(sht_oper, 8, 11)
+
+#define glue(x, y)            x ## y
+#define DPO(s)                glue(DataProcessingOperands, s)
+#define ROTATE_RIGHT(n, i, l) ((n << (l - i)) | (n >> i))
+#define ROTATE_LEFT(n, i, l)  ((n >> (l - i)) | (n << i))
+#define ROTATE_RIGHT_32(n, i) ROTATE_RIGHT(n, i, 32)
+#define ROTATE_LEFT_32(n, i)  ROTATE_LEFT(n, i, 32)
+
+typedef unsigned int (*shtop_fp_t)(ARMul_State* cpu, unsigned int sht_oper);
+
+static bool CondPassed(const ARMul_State* cpu, unsigned int cond) {
+    const bool n_flag = cpu->NFlag != 0;
+    const bool z_flag = cpu->ZFlag != 0;
+    const bool c_flag = cpu->CFlag != 0;
+    const bool v_flag = cpu->VFlag != 0;
+
+    switch (cond) {
+    case ConditionCode::EQ:
+        return z_flag;
+    case ConditionCode::NE:
+        return !z_flag;
+    case ConditionCode::CS:
+        return c_flag;
+    case ConditionCode::CC:
+        return !c_flag;
+    case ConditionCode::MI:
+        return n_flag;
+    case ConditionCode::PL:
+        return !n_flag;
+    case ConditionCode::VS:
+        return v_flag;
+    case ConditionCode::VC:
+        return !v_flag;
+    case ConditionCode::HI:
+        return (c_flag && !z_flag);
+    case ConditionCode::LS:
+        return (!c_flag || z_flag);
+    case ConditionCode::GE:
+        return (n_flag == v_flag);
+    case ConditionCode::LT:
+        return (n_flag != v_flag);
+    case ConditionCode::GT:
+        return (!z_flag && (n_flag == v_flag));
+    case ConditionCode::LE:
+        return (z_flag || (n_flag != v_flag));
+    case ConditionCode::AL:
+    case ConditionCode::NV: // Unconditional
+        return true;
+    }
+
+    return false;
+}
+
+static unsigned int DPO(Immediate)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int immed_8 = BITS(sht_oper, 0, 7);
+    unsigned int rotate_imm = BITS(sht_oper, 8, 11);
+    unsigned int shifter_operand = ROTATE_RIGHT_32(immed_8, rotate_imm * 2);
+    if (rotate_imm == 0)
+        cpu->shifter_carry_out = cpu->CFlag;
+    else
+        cpu->shifter_carry_out = BIT(shifter_operand, 31);
+    return shifter_operand;
+}
+
+static unsigned int DPO(Register)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int shifter_operand = rm;
+    cpu->shifter_carry_out = cpu->CFlag;
+    return shifter_operand;
+}
+
+static unsigned int DPO(LogicalShiftLeftByImmediate)(ARMul_State* cpu, unsigned int sht_oper) {
+    int shift_imm = BITS(sht_oper, 7, 11);
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int shifter_operand;
+    if (shift_imm == 0) {
+        shifter_operand = rm;
+        cpu->shifter_carry_out = cpu->CFlag;
+    } else {
+        shifter_operand = rm << shift_imm;
+        cpu->shifter_carry_out = BIT(rm, 32 - shift_imm);
+    }
+    return shifter_operand;
+}
+
+static unsigned int DPO(LogicalShiftLeftByRegister)(ARMul_State* cpu, unsigned int sht_oper) {
+    int shifter_operand;
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int rs = CHECK_READ_REG15(cpu, RS);
+    if (BITS(rs, 0, 7) == 0) {
+        shifter_operand = rm;
+        cpu->shifter_carry_out = cpu->CFlag;
+    } else if (BITS(rs, 0, 7) < 32) {
+        shifter_operand = rm << BITS(rs, 0, 7);
+        cpu->shifter_carry_out = BIT(rm, 32 - BITS(rs, 0, 7));
+    } else if (BITS(rs, 0, 7) == 32) {
+        shifter_operand = 0;
+        cpu->shifter_carry_out = BIT(rm, 0);
+    } else {
+        shifter_operand = 0;
+        cpu->shifter_carry_out = 0;
+    }
+    return shifter_operand;
+}
+
+static unsigned int DPO(LogicalShiftRightByImmediate)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int shifter_operand;
+    int shift_imm = BITS(sht_oper, 7, 11);
+    if (shift_imm == 0) {
+        shifter_operand = 0;
+        cpu->shifter_carry_out = BIT(rm, 31);
+    } else {
+        shifter_operand = rm >> shift_imm;
+        cpu->shifter_carry_out = BIT(rm, shift_imm - 1);
+    }
+    return shifter_operand;
+}
+
+static unsigned int DPO(LogicalShiftRightByRegister)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int rs = CHECK_READ_REG15(cpu, RS);
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int shifter_operand;
+    if (BITS(rs, 0, 7) == 0) {
+        shifter_operand = rm;
+        cpu->shifter_carry_out = cpu->CFlag;
+    } else if (BITS(rs, 0, 7) < 32) {
+        shifter_operand = rm >> BITS(rs, 0, 7);
+        cpu->shifter_carry_out = BIT(rm, BITS(rs, 0, 7) - 1);
+    } else if (BITS(rs, 0, 7) == 32) {
+        shifter_operand = 0;
+        cpu->shifter_carry_out = BIT(rm, 31);
+    } else {
+        shifter_operand = 0;
+        cpu->shifter_carry_out = 0;
+    }
+    return shifter_operand;
+}
+
+static unsigned int DPO(ArithmeticShiftRightByImmediate)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int shifter_operand;
+    int shift_imm = BITS(sht_oper, 7, 11);
+    if (shift_imm == 0) {
+        if (BIT(rm, 31) == 0)
+            shifter_operand = 0;
+        else
+            shifter_operand = 0xFFFFFFFF;
+        cpu->shifter_carry_out = BIT(rm, 31);
+    } else {
+        shifter_operand = static_cast<int>(rm) >> shift_imm;
+        cpu->shifter_carry_out = BIT(rm, shift_imm - 1);
+    }
+    return shifter_operand;
+}
+
+static unsigned int DPO(ArithmeticShiftRightByRegister)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int rs = CHECK_READ_REG15(cpu, RS);
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int shifter_operand;
+    if (BITS(rs, 0, 7) == 0) {
+        shifter_operand = rm;
+        cpu->shifter_carry_out = cpu->CFlag;
+    } else if (BITS(rs, 0, 7) < 32) {
+        shifter_operand = static_cast<int>(rm) >> BITS(rs, 0, 7);
+        cpu->shifter_carry_out = BIT(rm, BITS(rs, 0, 7) - 1);
+    } else {
+        if (BIT(rm, 31) == 0)
+            shifter_operand = 0;
+        else
+            shifter_operand = 0xffffffff;
+        cpu->shifter_carry_out = BIT(rm, 31);
+    }
+    return shifter_operand;
+}
+
+static unsigned int DPO(RotateRightByImmediate)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int shifter_operand;
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    int shift_imm = BITS(sht_oper, 7, 11);
+    if (shift_imm == 0) {
+        shifter_operand = (cpu->CFlag << 31) | (rm >> 1);
+        cpu->shifter_carry_out = BIT(rm, 0);
+    } else {
+        shifter_operand = ROTATE_RIGHT_32(rm, shift_imm);
+        cpu->shifter_carry_out = BIT(rm, shift_imm - 1);
+    }
+    return shifter_operand;
+}
+
+static unsigned int DPO(RotateRightByRegister)(ARMul_State* cpu, unsigned int sht_oper) {
+    unsigned int rm = CHECK_READ_REG15(cpu, RM);
+    unsigned int rs = CHECK_READ_REG15(cpu, RS);
+    unsigned int shifter_operand;
+    if (BITS(rs, 0, 7) == 0) {
+        shifter_operand = rm;
+        cpu->shifter_carry_out = cpu->CFlag;
+    } else if (BITS(rs, 0, 4) == 0) {
+        shifter_operand = rm;
+        cpu->shifter_carry_out = BIT(rm, 31);
+    } else {
+        shifter_operand = ROTATE_RIGHT_32(rm, BITS(rs, 0, 4));
+        cpu->shifter_carry_out = BIT(rm, BITS(rs, 0, 4) - 1);
+    }
+    return shifter_operand;
+}
+
+typedef void (*get_addr_fp_t)(ARMul_State *cpu, unsigned int inst, unsigned int &virt_addr);
+
+struct ldst_inst {
+    unsigned int inst;
+    get_addr_fp_t get_addr;
+};
+#define DEBUG_MSG LOG_DEBUG(Core_ARM11, "inst is %x", inst); CITRA_IGNORE_EXIT(0)
+
+#define LnSWoUB(s)   glue(LnSWoUB, s)
+#define MLnS(s)      glue(MLnS, s)
+#define LdnStM(s)    glue(LdnStM, s)
+
+#define W_BIT        BIT(inst, 21)
+#define U_BIT        BIT(inst, 23)
+#define I_BIT        BIT(inst, 25)
+#define P_BIT        BIT(inst, 24)
+#define OFFSET_12    BITS(inst, 0, 11)
+
+static void LnSWoUB(ImmediateOffset)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int addr;
+
+    if (U_BIT)
+        addr = CHECK_READ_REG15_WA(cpu, Rn) + OFFSET_12;
+    else
+        addr = CHECK_READ_REG15_WA(cpu, Rn) - OFFSET_12;
+
+    virt_addr = addr;
+}
+
+static void LnSWoUB(RegisterOffset)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst, 0, 3);
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+    unsigned int addr;
+
+    if (U_BIT)
+        addr = rn + rm;
+    else
+        addr = rn - rm;
+
+    virt_addr = addr;
+}
+
+static void LnSWoUB(ImmediatePostIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int addr = CHECK_READ_REG15_WA(cpu, Rn);
+
+    if (U_BIT)
+        cpu->Reg[Rn] += OFFSET_12;
+    else
+        cpu->Reg[Rn] -= OFFSET_12;
+
+    virt_addr = addr;
+}
+
+static void LnSWoUB(ImmediatePreIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int addr;
+
+    if (U_BIT)
+        addr = CHECK_READ_REG15_WA(cpu, Rn) + OFFSET_12;
+    else
+        addr = CHECK_READ_REG15_WA(cpu, Rn) - OFFSET_12;
+
+    virt_addr = addr;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)))
+        cpu->Reg[Rn] = addr;
+}
+
+static void MLnS(RegisterPreIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int addr;
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst,  0,  3);
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+
+    if (U_BIT)
+        addr = rn + rm;
+    else
+        addr = rn - rm;
+
+    virt_addr = addr;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)))
+        cpu->Reg[Rn] = addr;
+}
+
+static void LnSWoUB(RegisterPreIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst, 0, 3);
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+    unsigned int addr;
+
+    if (U_BIT)
+        addr = rn + rm;
+    else
+        addr = rn - rm;
+
+    virt_addr = addr;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31))) {
+        cpu->Reg[Rn] = addr;
+    }
+}
+
+static void LnSWoUB(ScaledRegisterPreIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int shift = BITS(inst, 5, 6);
+    unsigned int shift_imm = BITS(inst, 7, 11);
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst, 0, 3);
+    unsigned int index = 0;
+    unsigned int addr;
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+
+    switch (shift) {
+    case 0:
+        index = rm << shift_imm;
+        break;
+    case 1:
+        if (shift_imm == 0) {
+            index = 0;
+        } else {
+            index = rm >> shift_imm;
+        }
+        break;
+    case 2:
+        if (shift_imm == 0) { // ASR #32
+            if (BIT(rm, 31) == 1)
+                index = 0xFFFFFFFF;
+            else
+                index = 0;
+        } else {
+            index = static_cast<int>(rm) >> shift_imm;
+        }
+        break;
+    case 3:
+        if (shift_imm == 0) {
+            index = (cpu->CFlag << 31) | (rm >> 1);
+        } else {
+            index = ROTATE_RIGHT_32(rm, shift_imm);
+        }
+        break;
+    }
+
+    if (U_BIT)
+        addr = rn + index;
+    else
+        addr = rn - index;
+
+    virt_addr = addr;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)))
+        cpu->Reg[Rn] = addr;
+}
+
+static void LnSWoUB(ScaledRegisterPostIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int shift = BITS(inst, 5, 6);
+    unsigned int shift_imm = BITS(inst, 7, 11);
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst, 0, 3);
+    unsigned int index = 0;
+    unsigned int addr = CHECK_READ_REG15_WA(cpu, Rn);
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+
+    switch (shift) {
+    case 0:
+        index = rm << shift_imm;
+        break;
+    case 1:
+        if (shift_imm == 0) {
+            index = 0;
+        } else {
+            index = rm >> shift_imm;
+        }
+        break;
+    case 2:
+        if (shift_imm == 0) { // ASR #32
+            if (BIT(rm, 31) == 1)
+                index = 0xFFFFFFFF;
+            else
+                index = 0;
+        } else {
+            index = static_cast<int>(rm) >> shift_imm;
+        }
+        break;
+    case 3:
+        if (shift_imm == 0) {
+            index = (cpu->CFlag << 31) | (rm >> 1);
+        } else {
+            index = ROTATE_RIGHT_32(rm, shift_imm);
+        }
+        break;
+    }
+
+    virt_addr = addr;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31))) {
+        if (U_BIT)
+            cpu->Reg[Rn] += index;
+        else
+            cpu->Reg[Rn] -= index;
+    }
+}
+
+static void LnSWoUB(RegisterPostIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst,  0,  3);
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+
+    virt_addr = CHECK_READ_REG15_WA(cpu, Rn);
+
+    if (CondPassed(cpu, BITS(inst, 28, 31))) {
+        if (U_BIT) {
+            cpu->Reg[Rn] += rm;
+        } else {
+            cpu->Reg[Rn] -= rm;
+        }
+    }
+}
+
+static void MLnS(ImmediateOffset)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int immedL = BITS(inst, 0, 3);
+    unsigned int immedH = BITS(inst, 8, 11);
+    unsigned int Rn     = BITS(inst, 16, 19);
+    unsigned int addr;
+
+    unsigned int offset_8 = (immedH << 4) | immedL;
+
+    if (U_BIT)
+        addr = CHECK_READ_REG15_WA(cpu, Rn) + offset_8;
+    else
+        addr = CHECK_READ_REG15_WA(cpu, Rn) - offset_8;
+
+    virt_addr = addr;
+}
+
+static void MLnS(RegisterOffset)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int addr;
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst,  0,  3);
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+
+    if (U_BIT)
+        addr = rn + rm;
+    else
+        addr = rn - rm;
+
+    virt_addr = addr;
+}
+
+static void MLnS(ImmediatePreIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn     = BITS(inst, 16, 19);
+    unsigned int immedH = BITS(inst,  8, 11);
+    unsigned int immedL = BITS(inst,  0,  3);
+    unsigned int addr;
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+    unsigned int offset_8 = (immedH << 4) | immedL;
+
+    if (U_BIT)
+        addr = rn + offset_8;
+    else
+        addr = rn - offset_8;
+
+    virt_addr = addr;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)))
+        cpu->Reg[Rn] = addr;
+}
+
+static void MLnS(ImmediatePostIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn     = BITS(inst, 16, 19);
+    unsigned int immedH = BITS(inst,  8, 11);
+    unsigned int immedL = BITS(inst,  0,  3);
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+
+    virt_addr = rn;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31))) {
+        unsigned int offset_8 = (immedH << 4) | immedL;
+        if (U_BIT)
+            rn += offset_8;
+        else
+            rn -= offset_8;
+
+        cpu->Reg[Rn] = rn;
+    }
+}
+
+static void MLnS(RegisterPostIndexed)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst,  0,  3);
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+
+    virt_addr = CHECK_READ_REG15_WA(cpu, Rn);
+
+    if (CondPassed(cpu, BITS(inst, 28, 31))) {
+        if (U_BIT)
+            cpu->Reg[Rn] += rm;
+        else
+            cpu->Reg[Rn] -= rm;
+    }
+}
+
+static void LdnStM(DecrementBefore)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int i = BITS(inst, 0, 15);
+    int count = 0;
+
+    while (i) {
+        if (i & 1) count++;
+        i = i >> 1;
+    }
+
+    virt_addr = CHECK_READ_REG15_WA(cpu, Rn) - count * 4;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)) && BIT(inst, 21))
+        cpu->Reg[Rn] -= count * 4;
+}
+
+static void LdnStM(IncrementBefore)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int i = BITS(inst, 0, 15);
+    int count = 0;
+
+    while (i) {
+        if (i & 1) count++;
+        i = i >> 1;
+    }
+
+    virt_addr = CHECK_READ_REG15_WA(cpu, Rn) + 4;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)) && BIT(inst, 21))
+        cpu->Reg[Rn] += count * 4;
+}
+
+static void LdnStM(IncrementAfter)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int i = BITS(inst, 0, 15);
+    int count = 0;
+
+    while(i) {
+        if (i & 1) count++;
+        i = i >> 1;
+    }
+
+    virt_addr = CHECK_READ_REG15_WA(cpu, Rn);
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)) && BIT(inst, 21))
+        cpu->Reg[Rn] += count * 4;
+}
+
+static void LdnStM(DecrementAfter)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int i = BITS(inst, 0, 15);
+    int count = 0;
+    while(i) {
+        if(i & 1) count++;
+        i = i >> 1;
+    }
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+    unsigned int start_addr = rn - count * 4 + 4;
+
+    virt_addr = start_addr;
+
+    if (CondPassed(cpu, BITS(inst, 28, 31)) && BIT(inst, 21)) {
+        cpu->Reg[Rn] -= count * 4;
+    }
+}
+
+static void LnSWoUB(ScaledRegisterOffset)(ARMul_State* cpu, unsigned int inst, unsigned int& virt_addr) {
+    unsigned int shift = BITS(inst, 5, 6);
+    unsigned int shift_imm = BITS(inst, 7, 11);
+    unsigned int Rn = BITS(inst, 16, 19);
+    unsigned int Rm = BITS(inst, 0, 3);
+    unsigned int index = 0;
+    unsigned int addr;
+    unsigned int rm = CHECK_READ_REG15_WA(cpu, Rm);
+    unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
+
+    switch (shift) {
+    case 0:
+        index = rm << shift_imm;
+        break;
+    case 1:
+        if (shift_imm == 0) {
+            index = 0;
+        } else {
+            index = rm >> shift_imm;
+        }
+        break;
+    case 2:
+        if (shift_imm == 0) { // ASR #32
+            if (BIT(rm, 31) == 1)
+                index = 0xFFFFFFFF;
+            else
+                index = 0;
+        } else {
+            index = static_cast<int>(rm) >> shift_imm;
+        }
+        break;
+    case 3:
+        if (shift_imm == 0) {
+            index = (cpu->CFlag << 31) | (rm >> 1);
+        } else {
+            index = ROTATE_RIGHT_32(rm, shift_imm);
+        }
+        break;
+    }
+
+    if (U_BIT) {
+        addr = rn + index;
+    } else
+        addr = rn - index;
+
+    virt_addr = addr;
+}
+
+struct arm_inst {
+    unsigned int idx;
+    unsigned int cond;
+    int br;
+    __extension__ char component[0];
+};
+
+struct generic_arm_inst {
+    u32 Ra;
+    u32 Rm;
+    u32 Rn;
+    u32 Rd;
+    u8 op1;
+    u8 op2;
+};
+
+struct adc_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct add_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct orr_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct and_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct eor_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct bbl_inst {
+    unsigned int L;
+    int signed_immed_24;
+    unsigned int next_addr;
+    unsigned int jmp_addr;
+};
+
+struct bx_inst {
+    unsigned int Rm;
+};
+
+struct blx_inst {
+    union {
+        s32 signed_immed_24;
+        u32 Rm;
+    } val;
+    unsigned int inst;
+};
+
+struct clz_inst {
+    unsigned int Rm;
+    unsigned int Rd;
+};
+
+struct cps_inst {
+    unsigned int imod0;
+    unsigned int imod1;
+    unsigned int mmod;
+    unsigned int A, I, F;
+    unsigned int mode;
+};
+
+struct clrex_inst {
+};
+
+struct cpy_inst {
+    unsigned int Rm;
+    unsigned int Rd;
+};
+
+struct bic_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct sub_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct tst_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct cmn_inst {
+    unsigned int I;
+    unsigned int Rn;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct teq_inst {
+    unsigned int I;
+    unsigned int Rn;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct stm_inst {
+    unsigned int inst;
+};
+
+struct bkpt_inst {
+    u32 imm;
+};
+
+struct stc_inst {
+};
+
+struct ldc_inst {
+};
+
+struct swi_inst {
+    unsigned int num;
+};
+
+struct cmp_inst {
+    unsigned int I;
+    unsigned int Rn;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct mov_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct mvn_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct rev_inst {
+    unsigned int Rd;
+    unsigned int Rm;
+    unsigned int op1;
+    unsigned int op2;
+};
+
+struct rsb_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct rsc_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct sbc_inst {
+    unsigned int I;
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int shifter_operand;
+    shtop_fp_t shtop_func;
+};
+
+struct mul_inst {
+    unsigned int S;
+    unsigned int Rd;
+    unsigned int Rs;
+    unsigned int Rm;
+};
+
+struct smul_inst {
+    unsigned int Rd;
+    unsigned int Rs;
+    unsigned int Rm;
+    unsigned int x;
+    unsigned int y;
+};
+
+struct umull_inst {
+    unsigned int S;
+    unsigned int RdHi;
+    unsigned int RdLo;
+    unsigned int Rs;
+    unsigned int Rm;
+};
+
+struct smlad_inst {
+    unsigned int m;
+    unsigned int Rm;
+    unsigned int Rd;
+    unsigned int Ra;
+    unsigned int Rn;
+    unsigned int op1;
+    unsigned int op2;
+};
+
+struct smla_inst {
+    unsigned int x;
+    unsigned int y;
+    unsigned int Rm;
+    unsigned int Rd;
+    unsigned int Rs;
+    unsigned int Rn;
+};
+
+struct smlalxy_inst {
+    unsigned int x;
+    unsigned int y;
+    unsigned int RdLo;
+    unsigned int RdHi;
+    unsigned int Rm;
+    unsigned int Rn;
+};
+
+struct ssat_inst {
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int imm5;
+    unsigned int sat_imm;
+    unsigned int shift_type;
+};
+
+struct umaal_inst {
+    unsigned int Rn;
+    unsigned int Rm;
+    unsigned int RdHi;
+    unsigned int RdLo;
+};
+
+struct umlal_inst {
+    unsigned int S;
+    unsigned int Rm;
+    unsigned int Rs;
+    unsigned int RdHi;
+    unsigned int RdLo;
+};
+
+struct smlal_inst {
+    unsigned int S;
+    unsigned int Rm;
+    unsigned int Rs;
+    unsigned int RdHi;
+    unsigned int RdLo;
+};
+
+struct smlald_inst {
+    unsigned int RdLo;
+    unsigned int RdHi;
+    unsigned int Rm;
+    unsigned int Rn;
+    unsigned int swap;
+    unsigned int op1;
+    unsigned int op2;
+};
+
+struct mla_inst {
+    unsigned int S;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int Rs;
+    unsigned int Rm;
+};
+
+struct mrc_inst {
+    unsigned int opcode_1;
+    unsigned int opcode_2;
+    unsigned int cp_num;
+    unsigned int crn;
+    unsigned int crm;
+    unsigned int Rd;
+    unsigned int inst;
+};
+
+struct mcr_inst {
+    unsigned int opcode_1;
+    unsigned int opcode_2;
+    unsigned int cp_num;
+    unsigned int crn;
+    unsigned int crm;
+    unsigned int Rd;
+    unsigned int inst;
+};
+
+struct mcrr_inst {
+    unsigned int opcode_1;
+    unsigned int cp_num;
+    unsigned int crm;
+    unsigned int rt;
+    unsigned int rt2;
+};
+
+struct mrs_inst {
+    unsigned int R;
+    unsigned int Rd;
+};
+
+struct msr_inst {
+    unsigned int field_mask;
+    unsigned int R;
+    unsigned int inst;
+};
+
+struct pld_inst {
+};
+
+struct sxtb_inst {
+    unsigned int Rd;
+    unsigned int Rm;
+    unsigned int rotate;
+};
+
+struct sxtab_inst {
+    unsigned int Rd;
+    unsigned int Rn;
+    unsigned int Rm;
+    unsigned rotate;
+};
+
+struct sxtah_inst {
+    unsigned int Rd;
+    unsigned int Rn;
+    unsigned int Rm;
+    unsigned int rotate;
+};
+
+struct sxth_inst {
+    unsigned int Rd;
+    unsigned int Rm;
+    unsigned int rotate;
+};
+
+struct uxtab_inst {
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int rotate;
+    unsigned int Rm;
+};
+
+struct uxtah_inst {
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int rotate;
+    unsigned int Rm;
+};
+
+struct uxth_inst {
+    unsigned int Rd;
+    unsigned int Rm;
+    unsigned int rotate;
+};
+
+struct cdp_inst {
+    unsigned int opcode_1;
+    unsigned int CRn;
+    unsigned int CRd;
+    unsigned int cp_num;
+    unsigned int opcode_2;
+    unsigned int CRm;
+    unsigned int inst;
+};
+
+struct uxtb_inst {
+    unsigned int Rd;
+    unsigned int Rm;
+    unsigned int rotate;
+};
+
+struct swp_inst {
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned int Rm;
+};
+
+struct setend_inst {
+    unsigned int set_bigend;
+};
+
+struct b_2_thumb {
+    unsigned int imm;
+};
+struct b_cond_thumb {
+    unsigned int imm;
+    unsigned int cond;
+};
+
+struct bl_1_thumb {
+    unsigned int imm;
+};
+struct bl_2_thumb {
+    unsigned int imm;
+};
+struct blx_1_thumb {
+    unsigned int imm;
+    unsigned int instr;
+};
+
+struct pkh_inst {
+    unsigned int Rm;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned char imm;
+};
+
+typedef arm_inst * ARM_INST_PTR;
+
+#define CACHE_BUFFER_SIZE    (64 * 1024 * 2000)
+static char inst_buf[CACHE_BUFFER_SIZE];
+static int top = 0;
+static inline void *AllocBuffer(unsigned int size) {
+    int start = top;
+    top += size;
+    if (top > CACHE_BUFFER_SIZE) {
+        LOG_ERROR(Core_ARM11, "inst_buf is full");
+        CITRA_IGNORE_EXIT(-1);
+    }
+    return (void *)&inst_buf[start];
+}
+
+void InterpreterClearCache() {
+    top = 0;
+}
+
+static shtop_fp_t get_shtop(unsigned int inst) {
+    if (BIT(inst, 25)) {
+        return DPO(Immediate);
+    } else if (BITS(inst, 4, 11) == 0) {
+        return DPO(Register);
+    } else if (BITS(inst, 4, 6) == 0) {
+        return DPO(LogicalShiftLeftByImmediate);
+    } else if (BITS(inst, 4, 7) == 1) {
+        return DPO(LogicalShiftLeftByRegister);
+    } else if (BITS(inst, 4, 6) == 2) {
+        return DPO(LogicalShiftRightByImmediate);
+    } else if (BITS(inst, 4, 7) == 3) {
+        return DPO(LogicalShiftRightByRegister);
+    } else if (BITS(inst, 4, 6) == 4) {
+        return DPO(ArithmeticShiftRightByImmediate);
+    } else if (BITS(inst, 4, 7) == 5) {
+        return DPO(ArithmeticShiftRightByRegister);
+    } else if (BITS(inst, 4, 6) == 6) {
+        return DPO(RotateRightByImmediate);
+    } else if (BITS(inst, 4, 7) == 7) {
+        return DPO(RotateRightByRegister);
+    }
+    return nullptr;
+}
+
+static get_addr_fp_t get_calc_addr_op(unsigned int inst) {
+    if (BITS(inst, 24, 27) == 5 && BIT(inst, 21) == 0) {
+        return LnSWoUB(ImmediateOffset);
+    } else if (BITS(inst, 24, 27) == 7 && BIT(inst, 21) == 0 && BITS(inst, 4, 11) == 0) {
+        return LnSWoUB(RegisterOffset);
+    } else if (BITS(inst, 24, 27) == 7 && BIT(inst, 21) == 0 && BIT(inst, 4) == 0) {
+        return LnSWoUB(ScaledRegisterOffset);
+    } else if (BITS(inst, 24, 27) == 5 && BIT(inst, 21) == 1) {
+        return LnSWoUB(ImmediatePreIndexed);
+    } else if (BITS(inst, 24, 27) == 7 && BIT(inst, 21) == 1 && BITS(inst, 4, 11) == 0) {
+        return LnSWoUB(RegisterPreIndexed);
+    } else if (BITS(inst, 24, 27) == 7 && BIT(inst, 21) == 1 && BIT(inst, 4) == 0) {
+        return LnSWoUB(ScaledRegisterPreIndexed);
+    } else if (BITS(inst, 24, 27) == 4 && BIT(inst, 21) == 0) {
+        return LnSWoUB(ImmediatePostIndexed);
+    } else if (BITS(inst, 24, 27) == 6 && BIT(inst, 21) == 0 && BITS(inst, 4, 11) == 0) {
+        return LnSWoUB(RegisterPostIndexed);
+    } else if (BITS(inst, 24, 27) == 6 && BIT(inst, 21) == 0 && BIT(inst, 4) == 0) {
+        return LnSWoUB(ScaledRegisterPostIndexed);
+    } else if (BITS(inst, 24, 27) == 1 && BITS(inst, 21, 22) == 2 && BIT(inst, 7) == 1 && BIT(inst, 4) == 1) {
+        return MLnS(ImmediateOffset);
+    } else if (BITS(inst, 24, 27) == 1 && BITS(inst, 21, 22) == 0 && BIT(inst, 7) == 1 && BIT(inst, 4) == 1) {
+        return MLnS(RegisterOffset);
+    } else if (BITS(inst, 24, 27) == 1 && BITS(inst, 21, 22) == 3 && BIT(inst, 7) == 1 && BIT(inst, 4) == 1) {
+        return MLnS(ImmediatePreIndexed);
+    } else if (BITS(inst, 24, 27) == 1 && BITS(inst, 21, 22) == 1 && BIT(inst, 7) == 1 && BIT(inst, 4) == 1) {
+        return MLnS(RegisterPreIndexed);
+    } else if (BITS(inst, 24, 27) == 0 && BITS(inst, 21, 22) == 2 && BIT(inst, 7) == 1 && BIT(inst, 4) == 1) {
+        return MLnS(ImmediatePostIndexed);
+    } else if (BITS(inst, 24, 27) == 0 && BITS(inst, 21, 22) == 0 && BIT(inst, 7) == 1 && BIT(inst, 4) == 1) {
+        return MLnS(RegisterPostIndexed);
+    } else if (BITS(inst, 23, 27) == 0x11) {
+        return LdnStM(IncrementAfter);
+    } else if (BITS(inst, 23, 27) == 0x13) {
+        return LdnStM(IncrementBefore);
+    } else if (BITS(inst, 23, 27) == 0x10) {
+        return LdnStM(DecrementAfter);
+    } else if (BITS(inst, 23, 27) == 0x12) {
+        return LdnStM(DecrementBefore);
+    }
+    return nullptr;
+}
+
+#define INTERPRETER_TRANSLATE(s) glue(InterpreterTranslate_, s)
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(adc)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(adc_inst));
+    adc_inst *inst_cream = (adc_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(add)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(add_inst));
+    add_inst *inst_cream = (add_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(and)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(and_inst));
+    and_inst *inst_cream = (and_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(bbl)(unsigned int inst, int index)
+{
+    #define POSBRANCH ((inst & 0x7fffff) << 2)
+    #define NEGBRANCH ((0xff000000 |(inst & 0xffffff)) << 2)
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(bbl_inst));
+    bbl_inst *inst_cream = (bbl_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = DIRECT_BRANCH;
+
+    if (BIT(inst, 24))
+        inst_base->br = CALL;
+    if (BITS(inst, 28, 31) <= 0xe)
+        inst_base->br |= COND;
+
+    inst_cream->L      = BIT(inst, 24);
+    inst_cream->signed_immed_24 = BIT(inst, 23) ? NEGBRANCH : POSBRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(bic)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(bic_inst));
+    bic_inst *inst_cream = (bic_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(bkpt)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(bkpt_inst));
+    bkpt_inst* const inst_cream = (bkpt_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->imm = (BITS(inst, 8, 19) << 4) | BITS(inst, 0, 3);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(blx)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(blx_inst));
+    blx_inst *inst_cream = (blx_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = INDIRECT_BRANCH;
+
+    inst_cream->inst = inst;
+    if (BITS(inst, 20, 27) == 0x12 && BITS(inst, 4, 7) == 0x3) {
+        inst_cream->val.Rm = BITS(inst, 0, 3);
+    } else {
+        inst_cream->val.signed_immed_24 = BITS(inst, 0, 23);
+    }
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(bx)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(bx_inst));
+    bx_inst *inst_cream = (bx_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = INDIRECT_BRANCH;
+
+    inst_cream->Rm  = BITS(inst, 0, 3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(bxj)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(bx)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(cdp)(unsigned int inst, int index) {
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(cdp_inst));
+    cdp_inst *inst_cream = (cdp_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->CRm      = BITS(inst,  0,  3);
+    inst_cream->CRd      = BITS(inst, 12, 15);
+    inst_cream->CRn      = BITS(inst, 16, 19);
+    inst_cream->cp_num   = BITS(inst, 8, 11);
+    inst_cream->opcode_2 = BITS(inst, 5, 7);
+    inst_cream->opcode_1 = BITS(inst, 20, 23);
+    inst_cream->inst = inst;
+
+    LOG_TRACE(Core_ARM11, "inst %x index %x", inst, index);
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(clrex)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(clrex_inst));
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(clz)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(clz_inst));
+    clz_inst *inst_cream = (clz_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm = BITS(inst,  0,  3);
+    inst_cream->Rd = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(cmn)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(cmn_inst));
+    cmn_inst *inst_cream = (cmn_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(cmp)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(cmp_inst));
+    cmp_inst *inst_cream = (cmp_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(cps)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(cps_inst));
+    cps_inst *inst_cream = (cps_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->imod0 = BIT(inst, 18);
+    inst_cream->imod1 = BIT(inst, 19);
+    inst_cream->mmod  = BIT(inst, 17);
+    inst_cream->A     = BIT(inst, 8);
+    inst_cream->I     = BIT(inst, 7);
+    inst_cream->F     = BIT(inst, 6);
+    inst_cream->mode  = BITS(inst, 0, 4);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(cpy)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mov_inst));
+    mov_inst *inst_cream = (mov_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15) {
+        inst_base->br = INDIRECT_BRANCH;
+    }
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(eor)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(eor_inst));
+    eor_inst *inst_cream = (eor_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldc)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldc_inst));
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldm)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    if (BIT(inst, 15)) {
+        inst_base->br = INDIRECT_BRANCH;
+    }
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sxth)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sxtb_inst));
+    sxtb_inst *inst_cream = (sxtb_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+    inst_cream->rotate = BITS(inst, 10, 11);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    if (BITS(inst, 12, 15) == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrcond)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    if (BITS(inst, 12, 15) == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uxth)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(uxth_inst));
+    uxth_inst *inst_cream = (uxth_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uxtah)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(uxtah_inst));
+    uxtah_inst *inst_cream = (uxtah_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rn     = BITS(inst, 16, 19);
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrb)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrbt)(unsigned int inst, int index)
+{
+    arm_inst* inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    if (BITS(inst, 25, 27) == 2) {
+        inst_cream->get_addr = LnSWoUB(ImmediatePostIndexed);
+    } else if (BITS(inst, 25, 27) == 3) {
+        inst_cream->get_addr = LnSWoUB(ScaledRegisterPostIndexed);
+    } else {
+        DEBUG_MSG;
+    }
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrd)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrex)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst *inst_cream = (generic_arm_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = (BITS(inst, 12, 15) == 15) ? INDIRECT_BRANCH : NON_BRANCH; // Branch if dest is R15
+
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrexb)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(ldrex)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrexh)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(ldrex)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrexd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(ldrex)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrh)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrsb)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrsh)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ldrt)(unsigned int inst, int index)
+{
+    arm_inst* inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    if (BITS(inst, 25, 27) == 2) {
+        inst_cream->get_addr = LnSWoUB(ImmediatePostIndexed);
+    } else if (BITS(inst, 25, 27) == 3) {
+        inst_cream->get_addr = LnSWoUB(ScaledRegisterPostIndexed);
+    } else {
+        // Reaching this would indicate the thumb version
+        // of this instruction, however the 3DS CPU doesn't
+        // support this variant (the 3DS CPU is only ARMv6K,
+        // while this variant is added in ARMv6T2).
+        // So it's sufficient for citra to not implement this.
+        DEBUG_MSG;
+    }
+
+    if (BITS(inst, 12, 15) == 15) {
+        inst_base->br = INDIRECT_BRANCH;
+    }
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mcr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mcr_inst));
+    mcr_inst *inst_cream = (mcr_inst *)inst_base->component;
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->crn      = BITS(inst, 16, 19);
+    inst_cream->crm      = BITS(inst,  0,  3);
+    inst_cream->opcode_1 = BITS(inst, 21, 23);
+    inst_cream->opcode_2 = BITS(inst,  5,  7);
+    inst_cream->Rd       = BITS(inst, 12, 15);
+    inst_cream->cp_num   = BITS(inst,  8, 11);
+    inst_cream->inst     = inst;
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mcrr)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(mcrr_inst));
+    mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->crm      = BITS(inst, 0, 3);
+    inst_cream->opcode_1 = BITS(inst, 4, 7);
+    inst_cream->cp_num   = BITS(inst, 8, 11);
+    inst_cream->rt       = BITS(inst, 12, 15);
+    inst_cream->rt2      = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mla)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mla_inst));
+    mla_inst *inst_cream = (mla_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 12, 15);
+    inst_cream->Rd = BITS(inst, 16, 19);
+    inst_cream->Rs = BITS(inst,  8, 11);
+    inst_cream->Rm = BITS(inst,  0,  3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mov)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mov_inst));
+    mov_inst *inst_cream = (mov_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15) {
+        inst_base->br = INDIRECT_BRANCH;
+    }
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mrc)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mrc_inst));
+    mrc_inst *inst_cream = (mrc_inst *)inst_base->component;
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->crn      = BITS(inst, 16, 19);
+    inst_cream->crm      = BITS(inst,  0,  3);
+    inst_cream->opcode_1 = BITS(inst, 21, 23);
+    inst_cream->opcode_2 = BITS(inst,  5,  7);
+    inst_cream->Rd       = BITS(inst, 12, 15);
+    inst_cream->cp_num   = BITS(inst,  8, 11);
+    inst_cream->inst     = inst;
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mrrc)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(mcrr)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mrs)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mrs_inst));
+    mrs_inst *inst_cream = (mrs_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->R  = BIT(inst, 22);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(msr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(msr_inst));
+    msr_inst *inst_cream = (msr_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->field_mask = BITS(inst, 16, 19);
+    inst_cream->R          = BIT(inst, 22);
+    inst_cream->inst       = inst;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mul)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mul_inst));
+    mul_inst *inst_cream = (mul_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rm = BITS(inst, 0, 3);
+    inst_cream->Rs = BITS(inst, 8, 11);
+    inst_cream->Rd = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(mvn)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mvn_inst));
+    mvn_inst *inst_cream = (mvn_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15) {
+        inst_base->br = INDIRECT_BRANCH;
+    }
+    return inst_base;
+
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(orr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(orr_inst));
+    orr_inst *inst_cream = (orr_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+
+// NOP introduced in ARMv6K.
+static ARM_INST_PTR INTERPRETER_TRANSLATE(nop)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst));
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(pkhbt)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(pkh_inst));
+    pkh_inst *inst_cream = (pkh_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->imm = BITS(inst, 7, 11);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(pkhtb)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(pkhbt)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(pld)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(pld_inst));
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->op1 = BITS(inst, 21, 22);
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 21);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qaddsubx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qsub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(qsubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(rev)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst));
+    rev_inst* const inst_cream = (rev_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm  = BITS(inst,  0,  3);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 22);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(rev16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(rev)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(revsh)(unsigned int inst, int index)
+{
+     return INTERPRETER_TRANSLATE(rev)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(rfe)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst* const inst_cream = (ldst_inst*)inst_base->component;
+
+    inst_base->cond = AL;
+    inst_base->idx  = index;
+    inst_base->br   = INDIRECT_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(rsb)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(rsb_inst));
+    rsb_inst *inst_cream = (rsb_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(rsc)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(rsc_inst));
+    rsc_inst *inst_cream = (rsc_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sadd8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 21);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(saddsubx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ssub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ssub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ssubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sbc)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sbc_inst));
+    sbc_inst *inst_cream = (sbc_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sel)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 22);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(setend)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(setend_inst));
+    setend_inst* const inst_cream = (setend_inst*)inst_base->component;
+
+    inst_base->cond = AL;
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->set_bigend = BIT(inst, 9);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sev)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst));
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(shadd8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->op1 = BITS(inst, 20, 21);
+    inst_cream->op2 = BITS(inst, 5, 7);
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(shadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(shaddsubx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(shsub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(shsub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(shsubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smla)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smla_inst));
+    smla_inst *inst_cream = (smla_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->x  = BIT(inst, 5);
+    inst_cream->y  = BIT(inst, 6);
+    inst_cream->Rm = BITS(inst, 0, 3);
+    inst_cream->Rs = BITS(inst, 8, 11);
+    inst_cream->Rd = BITS(inst, 16, 19);
+    inst_cream->Rn = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smlad)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
+    smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->m   = BIT(inst, 5);
+    inst_cream->Rn  = BITS(inst, 0, 3);
+    inst_cream->Rm  = BITS(inst, 8, 11);
+    inst_cream->Rd  = BITS(inst, 16, 19);
+    inst_cream->Ra  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 22);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smuad)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlad)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smusd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlad)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smlsd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlad)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smlal)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umlal_inst));
+    umlal_inst *inst_cream = (umlal_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->S    = BIT(inst, 20);
+    inst_cream->Rm   = BITS(inst, 0, 3);
+    inst_cream->Rs   = BITS(inst, 8, 11);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smlalxy)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlalxy_inst));
+    smlalxy_inst* const inst_cream = (smlalxy_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->x    = BIT(inst, 5);
+    inst_cream->y    = BIT(inst, 6);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+    inst_cream->Rn   = BITS(inst, 0, 4);
+    inst_cream->Rm   = BITS(inst, 8, 11);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smlaw)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
+    smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Ra = BITS(inst, 12, 15);
+    inst_cream->Rm = BITS(inst, 8, 11);
+    inst_cream->Rn = BITS(inst, 0, 3);
+    inst_cream->Rd = BITS(inst, 16, 19);
+    inst_cream->m  = BIT(inst, 6);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smlald)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlald_inst));
+    smlald_inst* const inst_cream = (smlald_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm   = BITS(inst, 8, 11);
+    inst_cream->Rn   = BITS(inst, 0, 3);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+    inst_cream->swap = BIT(inst, 5);
+    inst_cream->op1  = BITS(inst, 20, 22);
+    inst_cream->op2  = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smlsld)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlald)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smmla)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
+    smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->m   = BIT(inst, 5);
+    inst_cream->Ra  = BITS(inst, 12, 15);
+    inst_cream->Rm  = BITS(inst, 8, 11);
+    inst_cream->Rn  = BITS(inst, 0, 3);
+    inst_cream->Rd  = BITS(inst, 16, 19);
+    inst_cream->op1 = BITS(inst, 20, 22);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smmls)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smmla)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smmul)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smmla)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smul)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smul_inst));
+    smul_inst *inst_cream = (smul_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd = BITS(inst, 16, 19);
+    inst_cream->Rs = BITS(inst,  8, 11);
+    inst_cream->Rm = BITS(inst,  0,  3);
+
+    inst_cream->x  = BIT(inst, 5);
+    inst_cream->y  = BIT(inst, 6);
+
+    return inst_base;
+
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smull)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umull_inst));
+    umull_inst *inst_cream = (umull_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->S    = BIT(inst, 20);
+    inst_cream->Rm   = BITS(inst, 0, 3);
+    inst_cream->Rs   = BITS(inst, 8, 11);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(smulw)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
+    smlad_inst *inst_cream = (smlad_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->m  = BIT(inst, 6);
+    inst_cream->Rm = BITS(inst, 8, 11);
+    inst_cream->Rn = BITS(inst, 0, 3);
+    inst_cream->Rd = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(srs)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst* const inst_cream = (ldst_inst*)inst_base->component;
+
+    inst_base->cond = AL;
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst     = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ssat)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ssat_inst));
+    ssat_inst* const inst_cream = (ssat_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rn = BITS(inst, 0, 3);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->imm5 = BITS(inst, 7, 11);
+    inst_cream->sat_imm = BITS(inst, 16, 20);
+    inst_cream->shift_type = BIT(inst, 6);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(ssat16)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ssat_inst));
+    ssat_inst* const inst_cream = (ssat_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rn      = BITS(inst, 0, 3);
+    inst_cream->Rd      = BITS(inst, 12, 15);
+    inst_cream->sat_imm = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(stc)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(stc_inst));
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(stm)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sxtb)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sxtb_inst));
+    sxtb_inst *inst_cream = (sxtb_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+    inst_cream->rotate = BITS(inst, 10, 11);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(str)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uxtb)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(uxth_inst));
+    uxth_inst *inst_cream = (uxth_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uxtab)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(uxtab_inst));
+    uxtab_inst *inst_cream = (uxtab_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+    inst_cream->Rn     = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strb)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strbt)(unsigned int inst, int index)
+{
+    arm_inst* inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+
+    if (BITS(inst, 25, 27) == 2) {
+        inst_cream->get_addr = LnSWoUB(ImmediatePostIndexed);
+    } else if (BITS(inst, 25, 27) == 3) {
+        inst_cream->get_addr = LnSWoUB(ScaledRegisterPostIndexed);
+    } else {
+        DEBUG_MSG;
+    }
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strd)(unsigned int inst, int index){
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strex)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst *inst_cream = (generic_arm_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->Rm  = BITS(inst, 0,   3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strexb)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(strex)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strexh)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(strex)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strexd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(strex)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strh)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    inst_cream->get_addr = get_calc_addr_op(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(strt)(unsigned int inst, int index)
+{
+    arm_inst* inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
+    ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->inst = inst;
+    if (BITS(inst, 25, 27) == 2) {
+        inst_cream->get_addr = LnSWoUB(ImmediatePostIndexed);
+    } else if (BITS(inst, 25, 27) == 3) {
+        inst_cream->get_addr = LnSWoUB(ScaledRegisterPostIndexed);
+    } else {
+        // Reaching this would indicate the thumb version
+        // of this instruction, however the 3DS CPU doesn't
+        // support this variant (the 3DS CPU is only ARMv6K,
+        // while this variant is added in ARMv6T2).
+        // So it's sufficient for citra to not implement this.
+        DEBUG_MSG;
+    }
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sub)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sub_inst));
+    sub_inst *inst_cream = (sub_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    if (inst_cream->Rd == 15)
+        inst_base->br = INDIRECT_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(swi)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(swi_inst));
+    swi_inst *inst_cream = (swi_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->num = BITS(inst, 0, 23);
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(swp)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(swp_inst));
+    swp_inst *inst_cream = (swp_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->Rm  = BITS(inst,  0,  3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(swpb)(unsigned int inst, int index){
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(swp_inst));
+    swp_inst *inst_cream = (swp_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->Rm  = BITS(inst,  0,  3);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sxtab)(unsigned int inst, int index){
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sxtab_inst));
+    sxtab_inst *inst_cream = (sxtab_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+    inst_cream->Rn     = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sxtab16)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(sxtab_inst));
+    sxtab_inst* const inst_cream = (sxtab_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm = BITS(inst, 0, 3);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sxtb16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sxtab16)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(sxtah)(unsigned int inst, int index) {
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sxtah_inst));
+    sxtah_inst *inst_cream = (sxtah_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+    inst_cream->Rm     = BITS(inst,  0,  3);
+    inst_cream->Rn     = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(teq)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(teq_inst));
+    teq_inst *inst_cream = (teq_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I               = BIT(inst, 25);
+    inst_cream->Rn              = BITS(inst, 16, 19);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func      = get_shtop(inst);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(tst)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(tst_inst));
+    tst_inst *inst_cream = (tst_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->I  = BIT(inst, 25);
+    inst_cream->S  = BIT(inst, 20);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->shifter_operand = BITS(inst, 0, 11);
+    inst_cream->shtop_func = get_shtop(inst);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uadd8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->op1 = BITS(inst, 20, 21);
+    inst_cream->op2 = BITS(inst, 5, 7);
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uaddsubx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(usub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(usub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(usubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uadd8)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uhadd8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->op1 = BITS(inst, 20, 21);
+    inst_cream->op2 = BITS(inst, 5, 7);
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uhadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uhadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uhaddsubx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uhadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uhadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uhadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uhsubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uhadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(umaal)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(umaal_inst));
+    umaal_inst* const inst_cream = (umaal_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm   = BITS(inst, 8, 11);
+    inst_cream->Rn   = BITS(inst, 0, 3);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(umlal)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umlal_inst));
+    umlal_inst *inst_cream = (umlal_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->S    = BIT(inst, 20);
+    inst_cream->Rm   = BITS(inst, 0, 3);
+    inst_cream->Rs   = BITS(inst, 8, 11);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(umull)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umull_inst));
+    umull_inst *inst_cream = (umull_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->S    = BIT(inst, 20);
+    inst_cream->Rm   = BITS(inst, 0, 3);
+    inst_cream->Rs   = BITS(inst, 8, 11);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(b_2_thumb)(unsigned int tinst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(b_2_thumb));
+    b_2_thumb *inst_cream = (b_2_thumb *)inst_base->component;
+
+    inst_cream->imm = ((tinst & 0x3FF) << 1) | ((tinst & (1 << 10)) ? 0xFFFFF800 : 0);
+
+    inst_base->idx = index;
+    inst_base->br  = DIRECT_BRANCH;
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(b_cond_thumb)(unsigned int tinst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(b_cond_thumb));
+    b_cond_thumb *inst_cream = (b_cond_thumb *)inst_base->component;
+
+    inst_cream->imm  = (((tinst & 0x7F) << 1) | ((tinst & (1 << 7)) ?    0xFFFFFF00 : 0));
+    inst_cream->cond = ((tinst >> 8) & 0xf);
+    inst_base->idx   = index;
+    inst_base->br    = DIRECT_BRANCH;
+
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(bl_1_thumb)(unsigned int tinst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(bl_1_thumb));
+    bl_1_thumb *inst_cream = (bl_1_thumb *)inst_base->component;
+
+    inst_cream->imm = (((tinst & 0x07FF) << 12) | ((tinst & (1 << 10)) ? 0xFF800000 : 0));
+
+    inst_base->idx = index;
+    inst_base->br  = NON_BRANCH;
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(bl_2_thumb)(unsigned int tinst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(bl_2_thumb));
+    bl_2_thumb *inst_cream = (bl_2_thumb *)inst_base->component;
+
+    inst_cream->imm = (tinst & 0x07FF) << 1;
+
+    inst_base->idx = index;
+    inst_base->br  = DIRECT_BRANCH;
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(blx_1_thumb)(unsigned int tinst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(blx_1_thumb));
+    blx_1_thumb *inst_cream = (blx_1_thumb *)inst_base->component;
+
+    inst_cream->imm   = (tinst & 0x07FF) << 1;
+    inst_cream->instr = tinst;
+
+    inst_base->idx    = index;
+    inst_base->br     = DIRECT_BRANCH;
+    return inst_base;
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uqadd8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 21);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uqadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uqadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uqaddsubx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uqadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uqsub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uqadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uqsub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uqadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uqsubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uqadd8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(usada8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->op1 = BITS(inst, 20, 24);
+    inst_cream->op2 = BITS(inst, 5, 7);
+    inst_cream->Rd  = BITS(inst, 16, 19);
+    inst_cream->Rm  = BITS(inst, 8, 11);
+    inst_cream->Rn  = BITS(inst, 0, 3);
+    inst_cream->Ra  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(usad8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(usada8)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(usat)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(ssat)(inst, index);
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(usat16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(ssat16)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uxtab16)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(uxtab_inst));
+    uxtab_inst* const inst_cream = (uxtab_inst*)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->Rm     = BITS(inst, 0, 3);
+    inst_cream->Rn     = BITS(inst, 16, 19);
+    inst_cream->Rd     = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(uxtb16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(uxtab16)(inst, index);
+}
+
+static ARM_INST_PTR INTERPRETER_TRANSLATE(wfe)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst));
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(wfi)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst));
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+static ARM_INST_PTR INTERPRETER_TRANSLATE(yield)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst));
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    return inst_base;
+}
+
+// Floating point VFPv3 structures and instructions
+
+#define VFP_INTERPRETER_STRUCT
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_INTERPRETER_STRUCT
+
+#define VFP_INTERPRETER_TRANS
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_INTERPRETER_TRANS
+
+typedef ARM_INST_PTR (*transop_fp_t)(unsigned int, int);
+
+const transop_fp_t arm_instruction_trans[] = {
+    INTERPRETER_TRANSLATE(vmla),
+    INTERPRETER_TRANSLATE(vmls),
+    INTERPRETER_TRANSLATE(vnmla),
+    INTERPRETER_TRANSLATE(vnmls),
+    INTERPRETER_TRANSLATE(vnmul),
+    INTERPRETER_TRANSLATE(vmul),
+    INTERPRETER_TRANSLATE(vadd),
+    INTERPRETER_TRANSLATE(vsub),
+    INTERPRETER_TRANSLATE(vdiv),
+    INTERPRETER_TRANSLATE(vmovi),
+    INTERPRETER_TRANSLATE(vmovr),
+    INTERPRETER_TRANSLATE(vabs),
+    INTERPRETER_TRANSLATE(vneg),
+    INTERPRETER_TRANSLATE(vsqrt),
+    INTERPRETER_TRANSLATE(vcmp),
+    INTERPRETER_TRANSLATE(vcmp2),
+    INTERPRETER_TRANSLATE(vcvtbds),
+    INTERPRETER_TRANSLATE(vcvtbff),
+    INTERPRETER_TRANSLATE(vcvtbfi),
+    INTERPRETER_TRANSLATE(vmovbrs),
+    INTERPRETER_TRANSLATE(vmsr),
+    INTERPRETER_TRANSLATE(vmovbrc),
+    INTERPRETER_TRANSLATE(vmrs),
+    INTERPRETER_TRANSLATE(vmovbcr),
+    INTERPRETER_TRANSLATE(vmovbrrss),
+    INTERPRETER_TRANSLATE(vmovbrrd),
+    INTERPRETER_TRANSLATE(vstr),
+    INTERPRETER_TRANSLATE(vpush),
+    INTERPRETER_TRANSLATE(vstm),
+    INTERPRETER_TRANSLATE(vpop),
+    INTERPRETER_TRANSLATE(vldr),
+    INTERPRETER_TRANSLATE(vldm),
+
+    INTERPRETER_TRANSLATE(srs),
+    INTERPRETER_TRANSLATE(rfe),
+    INTERPRETER_TRANSLATE(bkpt),
+    INTERPRETER_TRANSLATE(blx),
+    INTERPRETER_TRANSLATE(cps),
+    INTERPRETER_TRANSLATE(pld),
+    INTERPRETER_TRANSLATE(setend),
+    INTERPRETER_TRANSLATE(clrex),
+    INTERPRETER_TRANSLATE(rev16),
+    INTERPRETER_TRANSLATE(usad8),
+    INTERPRETER_TRANSLATE(sxtb),
+    INTERPRETER_TRANSLATE(uxtb),
+    INTERPRETER_TRANSLATE(sxth),
+    INTERPRETER_TRANSLATE(sxtb16),
+    INTERPRETER_TRANSLATE(uxth),
+    INTERPRETER_TRANSLATE(uxtb16),
+    INTERPRETER_TRANSLATE(cpy),
+    INTERPRETER_TRANSLATE(uxtab),
+    INTERPRETER_TRANSLATE(ssub8),
+    INTERPRETER_TRANSLATE(shsub8),
+    INTERPRETER_TRANSLATE(ssubaddx),
+    INTERPRETER_TRANSLATE(strex),
+    INTERPRETER_TRANSLATE(strexb),
+    INTERPRETER_TRANSLATE(swp),
+    INTERPRETER_TRANSLATE(swpb),
+    INTERPRETER_TRANSLATE(ssub16),
+    INTERPRETER_TRANSLATE(ssat16),
+    INTERPRETER_TRANSLATE(shsubaddx),
+    INTERPRETER_TRANSLATE(qsubaddx),
+    INTERPRETER_TRANSLATE(shaddsubx),
+    INTERPRETER_TRANSLATE(shadd8),
+    INTERPRETER_TRANSLATE(shadd16),
+    INTERPRETER_TRANSLATE(sel),
+    INTERPRETER_TRANSLATE(saddsubx),
+    INTERPRETER_TRANSLATE(sadd8),
+    INTERPRETER_TRANSLATE(sadd16),
+    INTERPRETER_TRANSLATE(shsub16),
+    INTERPRETER_TRANSLATE(umaal),
+    INTERPRETER_TRANSLATE(uxtab16),
+    INTERPRETER_TRANSLATE(usubaddx),
+    INTERPRETER_TRANSLATE(usub8),
+    INTERPRETER_TRANSLATE(usub16),
+    INTERPRETER_TRANSLATE(usat16),
+    INTERPRETER_TRANSLATE(usada8),
+    INTERPRETER_TRANSLATE(uqsubaddx),
+    INTERPRETER_TRANSLATE(uqsub8),
+    INTERPRETER_TRANSLATE(uqsub16),
+    INTERPRETER_TRANSLATE(uqaddsubx),
+    INTERPRETER_TRANSLATE(uqadd8),
+    INTERPRETER_TRANSLATE(uqadd16),
+    INTERPRETER_TRANSLATE(sxtab),
+    INTERPRETER_TRANSLATE(uhsubaddx),
+    INTERPRETER_TRANSLATE(uhsub8),
+    INTERPRETER_TRANSLATE(uhsub16),
+    INTERPRETER_TRANSLATE(uhaddsubx),
+    INTERPRETER_TRANSLATE(uhadd8),
+    INTERPRETER_TRANSLATE(uhadd16),
+    INTERPRETER_TRANSLATE(uaddsubx),
+    INTERPRETER_TRANSLATE(uadd8),
+    INTERPRETER_TRANSLATE(uadd16),
+    INTERPRETER_TRANSLATE(sxtah),
+    INTERPRETER_TRANSLATE(sxtab16),
+    INTERPRETER_TRANSLATE(qadd8),
+    INTERPRETER_TRANSLATE(bxj),
+    INTERPRETER_TRANSLATE(clz),
+    INTERPRETER_TRANSLATE(uxtah),
+    INTERPRETER_TRANSLATE(bx),
+    INTERPRETER_TRANSLATE(rev),
+    INTERPRETER_TRANSLATE(blx),
+    INTERPRETER_TRANSLATE(revsh),
+    INTERPRETER_TRANSLATE(qadd),
+    INTERPRETER_TRANSLATE(qadd16),
+    INTERPRETER_TRANSLATE(qaddsubx),
+    INTERPRETER_TRANSLATE(ldrex),
+    INTERPRETER_TRANSLATE(qdadd),
+    INTERPRETER_TRANSLATE(qdsub),
+    INTERPRETER_TRANSLATE(qsub),
+    INTERPRETER_TRANSLATE(ldrexb),
+    INTERPRETER_TRANSLATE(qsub8),
+    INTERPRETER_TRANSLATE(qsub16),
+    INTERPRETER_TRANSLATE(smuad),
+    INTERPRETER_TRANSLATE(smmul),
+    INTERPRETER_TRANSLATE(smusd),
+    INTERPRETER_TRANSLATE(smlsd),
+    INTERPRETER_TRANSLATE(smlsld),
+    INTERPRETER_TRANSLATE(smmla),
+    INTERPRETER_TRANSLATE(smmls),
+    INTERPRETER_TRANSLATE(smlald),
+    INTERPRETER_TRANSLATE(smlad),
+    INTERPRETER_TRANSLATE(smlaw),
+    INTERPRETER_TRANSLATE(smulw),
+    INTERPRETER_TRANSLATE(pkhtb),
+    INTERPRETER_TRANSLATE(pkhbt),
+    INTERPRETER_TRANSLATE(smul),
+    INTERPRETER_TRANSLATE(smlalxy),
+    INTERPRETER_TRANSLATE(smla),
+    INTERPRETER_TRANSLATE(mcrr),
+    INTERPRETER_TRANSLATE(mrrc),
+    INTERPRETER_TRANSLATE(cmp),
+    INTERPRETER_TRANSLATE(tst),
+    INTERPRETER_TRANSLATE(teq),
+    INTERPRETER_TRANSLATE(cmn),
+    INTERPRETER_TRANSLATE(smull),
+    INTERPRETER_TRANSLATE(umull),
+    INTERPRETER_TRANSLATE(umlal),
+    INTERPRETER_TRANSLATE(smlal),
+    INTERPRETER_TRANSLATE(mul),
+    INTERPRETER_TRANSLATE(mla),
+    INTERPRETER_TRANSLATE(ssat),
+    INTERPRETER_TRANSLATE(usat),
+    INTERPRETER_TRANSLATE(mrs),
+    INTERPRETER_TRANSLATE(msr),
+    INTERPRETER_TRANSLATE(and),
+    INTERPRETER_TRANSLATE(bic),
+    INTERPRETER_TRANSLATE(ldm),
+    INTERPRETER_TRANSLATE(eor),
+    INTERPRETER_TRANSLATE(add),
+    INTERPRETER_TRANSLATE(rsb),
+    INTERPRETER_TRANSLATE(rsc),
+    INTERPRETER_TRANSLATE(sbc),
+    INTERPRETER_TRANSLATE(adc),
+    INTERPRETER_TRANSLATE(sub),
+    INTERPRETER_TRANSLATE(orr),
+    INTERPRETER_TRANSLATE(mvn),
+    INTERPRETER_TRANSLATE(mov),
+    INTERPRETER_TRANSLATE(stm),
+    INTERPRETER_TRANSLATE(ldm),
+    INTERPRETER_TRANSLATE(ldrsh),
+    INTERPRETER_TRANSLATE(stm),
+    INTERPRETER_TRANSLATE(ldm),
+    INTERPRETER_TRANSLATE(ldrsb),
+    INTERPRETER_TRANSLATE(strd),
+    INTERPRETER_TRANSLATE(ldrh),
+    INTERPRETER_TRANSLATE(strh),
+    INTERPRETER_TRANSLATE(ldrd),
+    INTERPRETER_TRANSLATE(strt),
+    INTERPRETER_TRANSLATE(strbt),
+    INTERPRETER_TRANSLATE(ldrbt),
+    INTERPRETER_TRANSLATE(ldrt),
+    INTERPRETER_TRANSLATE(mrc),
+    INTERPRETER_TRANSLATE(mcr),
+    INTERPRETER_TRANSLATE(msr),
+    INTERPRETER_TRANSLATE(msr),
+    INTERPRETER_TRANSLATE(msr),
+    INTERPRETER_TRANSLATE(msr),
+    INTERPRETER_TRANSLATE(msr),
+    INTERPRETER_TRANSLATE(ldrb),
+    INTERPRETER_TRANSLATE(strb),
+    INTERPRETER_TRANSLATE(ldr),
+    INTERPRETER_TRANSLATE(ldrcond),
+    INTERPRETER_TRANSLATE(str),
+    INTERPRETER_TRANSLATE(cdp),
+    INTERPRETER_TRANSLATE(stc),
+    INTERPRETER_TRANSLATE(ldc),
+    INTERPRETER_TRANSLATE(ldrexd),
+    INTERPRETER_TRANSLATE(strexd),
+    INTERPRETER_TRANSLATE(ldrexh),
+    INTERPRETER_TRANSLATE(strexh),
+    INTERPRETER_TRANSLATE(nop),
+    INTERPRETER_TRANSLATE(yield),
+    INTERPRETER_TRANSLATE(wfe),
+    INTERPRETER_TRANSLATE(wfi),
+    INTERPRETER_TRANSLATE(sev),
+    INTERPRETER_TRANSLATE(swi),
+    INTERPRETER_TRANSLATE(bbl),
+
+    // All the thumb instructions should be placed the end of table
+    INTERPRETER_TRANSLATE(b_2_thumb),
+    INTERPRETER_TRANSLATE(b_cond_thumb),
+    INTERPRETER_TRANSLATE(bl_1_thumb),
+    INTERPRETER_TRANSLATE(bl_2_thumb),
+    INTERPRETER_TRANSLATE(blx_1_thumb)
+};
+
+enum {
+    FETCH_SUCCESS,
+    FETCH_FAILURE
+};
+
+static ThumbDecodeStatus DecodeThumbInstruction(u32 inst, u32 addr, u32* arm_inst, u32* inst_size, ARM_INST_PTR* ptr_inst_base) {
+    // Check if in Thumb mode
+    ThumbDecodeStatus ret = TranslateThumbInstruction (addr, inst, arm_inst, inst_size);
+    if (ret == ThumbDecodeStatus::BRANCH) {
+        int inst_index;
+        int table_length = sizeof(arm_instruction_trans) / sizeof(transop_fp_t);
+        u32 tinstr = GetThumbInstruction(inst, addr);
+
+        switch ((tinstr & 0xF800) >> 11) {
+        case 26:
+        case 27:
+            if (((tinstr & 0x0F00) != 0x0E00) && ((tinstr & 0x0F00) != 0x0F00)){
+                inst_index = table_length - 4;
+                *ptr_inst_base = arm_instruction_trans[inst_index](tinstr, inst_index);
+            } else {
+                LOG_ERROR(Core_ARM11, "thumb decoder error");
+            }
+            break;
+        case 28:
+            // Branch 2, unconditional branch
+            inst_index = table_length - 5;
+            *ptr_inst_base = arm_instruction_trans[inst_index](tinstr, inst_index);
+            break;
+
+        case 8:
+        case 29:
+            // For BLX 1 thumb instruction
+            inst_index = table_length - 1;
+            *ptr_inst_base = arm_instruction_trans[inst_index](tinstr, inst_index);
+            break;
+        case 30:
+            // For BL 1 thumb instruction
+            inst_index = table_length - 3;
+            *ptr_inst_base = arm_instruction_trans[inst_index](tinstr, inst_index);
+            break;
+        case 31:
+            // For BL 2 thumb instruction
+            inst_index = table_length - 2;
+            *ptr_inst_base = arm_instruction_trans[inst_index](tinstr, inst_index);
+            break;
+        default:
+            ret = ThumbDecodeStatus::UNDEFINED;
+            break;
+        }
+    }
+    return ret;
+}
+
+enum {
+    KEEP_GOING,
+    FETCH_EXCEPTION
+};
+
+static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, const u32 phys_addr, ARM_INST_PTR& inst_base) {
+    unsigned int inst_size = 4;
+    unsigned int inst = (*cpu->user_callbacks.MemoryRead32)(phys_addr & 0xFFFFFFFC);
+
+    // If we are in Thumb mode, we'll translate one Thumb instruction to the corresponding ARM instruction
+    if (cpu->TFlag) {
+        u32 arm_inst;
+        ThumbDecodeStatus state = DecodeThumbInstruction(inst, phys_addr, &arm_inst, &inst_size, &inst_base);
+
+        // We have translated the Thumb branch instruction in the Thumb decoder
+        if (state == ThumbDecodeStatus::BRANCH) {
+            return inst_size;
+        }
+        inst = arm_inst;
+    }
+
+    int idx;
+    if (DecodeARMInstruction(inst, &idx) == ARMDecodeStatus::FAILURE) {
+        LOG_ERROR(Core_ARM11, "Decode failure.\tPC : [0x%x]\tInstruction : %s [%x]", phys_addr, "", inst);
+        LOG_ERROR(Core_ARM11, "cpsr=0x%x, cpu->TFlag=%d, r15=0x%x", cpu->Cpsr, cpu->TFlag, cpu->Reg[15]);
+        CITRA_IGNORE_EXIT(-1);
+    }
+    inst_base = arm_instruction_trans[idx](inst, idx);
+
+    return inst_size;
+}
+
+static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
+
+    // Decode instruction, get index
+    // Allocate memory and init InsCream
+    // Go on next, until terminal instruction
+    // Save start addr of basicblock in CreamCache
+    ARM_INST_PTR inst_base = nullptr;
+    int ret = NON_BRANCH;
+    int size = 0; // instruction size of basic block
+    bb_start = top;
+
+    u32 phys_addr = addr;
+    u32 pc_start = cpu->Reg[15];
+
+    while (ret == NON_BRANCH) {
+        unsigned int inst_size = InterpreterTranslateInstruction(cpu, phys_addr, inst_base);
+
+        size++;
+
+        phys_addr += inst_size;
+
+        if ((phys_addr & 0xfff) == 0) {
+            inst_base->br = END_OF_PAGE;
+        }
+        ret = inst_base->br;
+    };
+
+    cpu->instruction_cache[pc_start] = bb_start;
+
+    return KEEP_GOING;
+}
+
+static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
+
+    ARM_INST_PTR inst_base = nullptr;
+    bb_start = top;
+
+    u32 phys_addr = addr;
+    u32 pc_start = cpu->Reg[15];
+
+    InterpreterTranslateInstruction(cpu, phys_addr, inst_base);
+
+    if (inst_base->br == NON_BRANCH) {
+        inst_base->br = SINGLE_STEP;
+    }
+
+    cpu->instruction_cache[pc_start] = bb_start;
+
+    return KEEP_GOING;
+}
+
+static int clz(unsigned int x) {
+    int n;
+    if (x == 0) return (32);
+    n = 1;
+    if ((x >> 16) == 0) { n = n + 16; x = x << 16;}
+    if ((x >> 24) == 0) { n = n +  8; x = x <<  8;}
+    if ((x >> 28) == 0) { n = n +  4; x = x <<  4;}
+    if ((x >> 30) == 0) { n = n +  2; x = x <<  2;}
+    n = n - (x >> 31);
+    return n;
+}
+
+
+unsigned InterpreterMainLoop(ARMul_State* cpu) {
+    #undef RM
+    #undef RS
+
+    #define CRn             inst_cream->crn
+    #define OPCODE_1        inst_cream->opcode_1
+    #define OPCODE_2        inst_cream->opcode_2
+    #define CRm             inst_cream->crm
+    #define RD              cpu->Reg[inst_cream->Rd]
+    #define RD2             cpu->Reg[inst_cream->Rd + 1]
+    #define RN              cpu->Reg[inst_cream->Rn]
+    #define RM              cpu->Reg[inst_cream->Rm]
+    #define RS              cpu->Reg[inst_cream->Rs]
+    #define RDHI            cpu->Reg[inst_cream->RdHi]
+    #define RDLO            cpu->Reg[inst_cream->RdLo]
+    #define LINK_RTN_ADDR   (cpu->Reg[14] = cpu->Reg[15] + 4)
+    #define SET_PC          (cpu->Reg[15] = cpu->Reg[15] + 8 + inst_cream->signed_immed_24)
+    #define SHIFTER_OPERAND inst_cream->shtop_func(cpu, inst_cream->shifter_operand)
+
+    #define FETCH_INST if (inst_base->br != NON_BRANCH) goto DISPATCH; \
+                       inst_base = (arm_inst *)&inst_buf[ptr]
+
+    #define INC_PC(l)   ptr += sizeof(arm_inst) + l
+    #define INC_PC_STUB ptr += sizeof(arm_inst)
+
+#define GDB_BP_CHECK /*none*/
+
+#define GOTO_NEXT_INST \
+    GDB_BP_CHECK; \
+    if (num_instrs >= cpu->NumInstrsToExecute) goto END; \
+    num_instrs++; \
+    switch(inst_base->idx) { \
+    case 0: goto VMLA_INST; \
+    case 1: goto VMLS_INST; \
+    case 2: goto VNMLA_INST; \
+    case 3: goto VNMLS_INST; \
+    case 4: goto VNMUL_INST; \
+    case 5: goto VMUL_INST; \
+    case 6: goto VADD_INST; \
+    case 7: goto VSUB_INST; \
+    case 8: goto VDIV_INST; \
+    case 9: goto VMOVI_INST; \
+    case 10: goto VMOVR_INST; \
+    case 11: goto VABS_INST; \
+    case 12: goto VNEG_INST; \
+    case 13: goto VSQRT_INST; \
+    case 14: goto VCMP_INST; \
+    case 15: goto VCMP2_INST; \
+    case 16: goto VCVTBDS_INST; \
+    case 17: goto VCVTBFF_INST; \
+    case 18: goto VCVTBFI_INST; \
+    case 19: goto VMOVBRS_INST; \
+    case 20: goto VMSR_INST; \
+    case 21: goto VMOVBRC_INST; \
+    case 22: goto VMRS_INST; \
+    case 23: goto VMOVBCR_INST; \
+    case 24: goto VMOVBRRSS_INST; \
+    case 25: goto VMOVBRRD_INST; \
+    case 26: goto VSTR_INST; \
+    case 27: goto VPUSH_INST; \
+    case 28: goto VSTM_INST; \
+    case 29: goto VPOP_INST; \
+    case 30: goto VLDR_INST; \
+    case 31: goto VLDM_INST ; \
+    case 32: goto SRS_INST; \
+    case 33: goto RFE_INST; \
+    case 34: goto BKPT_INST; \
+    case 35: goto BLX_INST; \
+    case 36: goto CPS_INST; \
+    case 37: goto PLD_INST; \
+    case 38: goto SETEND_INST; \
+    case 39: goto CLREX_INST; \
+    case 40: goto REV16_INST; \
+    case 41: goto USAD8_INST; \
+    case 42: goto SXTB_INST; \
+    case 43: goto UXTB_INST; \
+    case 44: goto SXTH_INST; \
+    case 45: goto SXTB16_INST; \
+    case 46: goto UXTH_INST; \
+    case 47: goto UXTB16_INST; \
+    case 48: goto CPY_INST; \
+    case 49: goto UXTAB_INST; \
+    case 50: goto SSUB8_INST; \
+    case 51: goto SHSUB8_INST; \
+    case 52: goto SSUBADDX_INST; \
+    case 53: goto STREX_INST; \
+    case 54: goto STREXB_INST; \
+    case 55: goto SWP_INST; \
+    case 56: goto SWPB_INST; \
+    case 57: goto SSUB16_INST; \
+    case 58: goto SSAT16_INST; \
+    case 59: goto SHSUBADDX_INST; \
+    case 60: goto QSUBADDX_INST; \
+    case 61: goto SHADDSUBX_INST; \
+    case 62: goto SHADD8_INST; \
+    case 63: goto SHADD16_INST; \
+    case 64: goto SEL_INST; \
+    case 65: goto SADDSUBX_INST; \
+    case 66: goto SADD8_INST; \
+    case 67: goto SADD16_INST; \
+    case 68: goto SHSUB16_INST; \
+    case 69: goto UMAAL_INST; \
+    case 70: goto UXTAB16_INST; \
+    case 71: goto USUBADDX_INST; \
+    case 72: goto USUB8_INST; \
+    case 73: goto USUB16_INST; \
+    case 74: goto USAT16_INST; \
+    case 75: goto USADA8_INST; \
+    case 76: goto UQSUBADDX_INST; \
+    case 77: goto UQSUB8_INST; \
+    case 78: goto UQSUB16_INST; \
+    case 79: goto UQADDSUBX_INST; \
+    case 80: goto UQADD8_INST; \
+    case 81: goto UQADD16_INST; \
+    case 82: goto SXTAB_INST; \
+    case 83: goto UHSUBADDX_INST; \
+    case 84: goto UHSUB8_INST; \
+    case 85: goto UHSUB16_INST; \
+    case 86: goto UHADDSUBX_INST; \
+    case 87: goto UHADD8_INST; \
+    case 88: goto UHADD16_INST; \
+    case 89: goto UADDSUBX_INST; \
+    case 90: goto UADD8_INST; \
+    case 91: goto UADD16_INST; \
+    case 92: goto SXTAH_INST; \
+    case 93: goto SXTAB16_INST; \
+    case 94: goto QADD8_INST; \
+    case 95: goto BXJ_INST; \
+    case 96: goto CLZ_INST; \
+    case 97: goto UXTAH_INST; \
+    case 98: goto BX_INST; \
+    case 99: goto REV_INST; \
+    case 100: goto BLX_INST; \
+    case 101: goto REVSH_INST; \
+    case 102: goto QADD_INST; \
+    case 103: goto QADD16_INST; \
+    case 104: goto QADDSUBX_INST; \
+    case 105: goto LDREX_INST; \
+    case 106: goto QDADD_INST; \
+    case 107: goto QDSUB_INST; \
+    case 108: goto QSUB_INST; \
+    case 109: goto LDREXB_INST; \
+    case 110: goto QSUB8_INST; \
+    case 111: goto QSUB16_INST; \
+    case 112: goto SMUAD_INST; \
+    case 113: goto SMMUL_INST; \
+    case 114: goto SMUSD_INST; \
+    case 115: goto SMLSD_INST; \
+    case 116: goto SMLSLD_INST; \
+    case 117: goto SMMLA_INST; \
+    case 118: goto SMMLS_INST; \
+    case 119: goto SMLALD_INST; \
+    case 120: goto SMLAD_INST; \
+    case 121: goto SMLAW_INST; \
+    case 122: goto SMULW_INST; \
+    case 123: goto PKHTB_INST; \
+    case 124: goto PKHBT_INST; \
+    case 125: goto SMUL_INST; \
+    case 126: goto SMLALXY_INST; \
+    case 127: goto SMLA_INST; \
+    case 128: goto MCRR_INST; \
+    case 129: goto MRRC_INST; \
+    case 130: goto CMP_INST; \
+    case 131: goto TST_INST; \
+    case 132: goto TEQ_INST; \
+    case 133: goto CMN_INST; \
+    case 134: goto SMULL_INST; \
+    case 135: goto UMULL_INST; \
+    case 136: goto UMLAL_INST; \
+    case 137: goto SMLAL_INST; \
+    case 138: goto MUL_INST; \
+    case 139: goto MLA_INST; \
+    case 140: goto SSAT_INST; \
+    case 141: goto USAT_INST; \
+    case 142: goto MRS_INST; \
+    case 143: goto MSR_INST; \
+    case 144: goto AND_INST; \
+    case 145: goto BIC_INST; \
+    case 146: goto LDM_INST; \
+    case 147: goto EOR_INST; \
+    case 148: goto ADD_INST; \
+    case 149: goto RSB_INST; \
+    case 150: goto RSC_INST; \
+    case 151: goto SBC_INST; \
+    case 152: goto ADC_INST; \
+    case 153: goto SUB_INST; \
+    case 154: goto ORR_INST; \
+    case 155: goto MVN_INST; \
+    case 156: goto MOV_INST; \
+    case 157: goto STM_INST; \
+    case 158: goto LDM_INST; \
+    case 159: goto LDRSH_INST; \
+    case 160: goto STM_INST; \
+    case 161: goto LDM_INST; \
+    case 162: goto LDRSB_INST; \
+    case 163: goto STRD_INST; \
+    case 164: goto LDRH_INST; \
+    case 165: goto STRH_INST; \
+    case 166: goto LDRD_INST; \
+    case 167: goto STRT_INST; \
+    case 168: goto STRBT_INST; \
+    case 169: goto LDRBT_INST; \
+    case 170: goto LDRT_INST; \
+    case 171: goto MRC_INST; \
+    case 172: goto MCR_INST; \
+    case 173: goto MSR_INST; \
+    case 174: goto MSR_INST; \
+    case 175: goto MSR_INST; \
+    case 176: goto MSR_INST; \
+    case 177: goto MSR_INST; \
+    case 178: goto LDRB_INST; \
+    case 179: goto STRB_INST; \
+    case 180: goto LDR_INST; \
+    case 181: goto LDRCOND_INST ; \
+    case 182: goto STR_INST; \
+    case 183: goto CDP_INST; \
+    case 184: goto STC_INST; \
+    case 185: goto LDC_INST; \
+    case 186: goto LDREXD_INST; \
+    case 187: goto STREXD_INST; \
+    case 188: goto LDREXH_INST; \
+    case 189: goto STREXH_INST; \
+    case 190: goto NOP_INST; \
+    case 191: goto YIELD_INST; \
+    case 192: goto WFE_INST; \
+    case 193: goto WFI_INST; \
+    case 194: goto SEV_INST; \
+    case 195: goto SWI_INST; \
+    case 196: goto BBL_INST; \
+    case 197: goto B_2_THUMB ; \
+    case 198: goto B_COND_THUMB ; \
+    case 199: goto BL_1_THUMB ; \
+    case 200: goto BL_2_THUMB ; \
+    case 201: goto BLX_1_THUMB ; \
+    case 202: goto DISPATCH; \
+    case 203: goto INIT_INST_LENGTH; \
+    case 204: goto END; \
+    }
+
+    #define UPDATE_NFLAG(dst)    (cpu->NFlag = BIT(dst, 31) ? 1 : 0)
+    #define UPDATE_ZFLAG(dst)    (cpu->ZFlag = dst ? 0 : 1)
+    #define UPDATE_CFLAG_WITH_SC (cpu->CFlag = cpu->shifter_carry_out)
+
+    #define SAVE_NZCVT cpu->Cpsr = (cpu->Cpsr & 0x0fffffdf) | \
+                      (cpu->NFlag << 31) | \
+                      (cpu->ZFlag << 30) | \
+                      (cpu->CFlag << 29) | \
+                      (cpu->VFlag << 28) | \
+                      (cpu->TFlag << 5)
+    #define LOAD_NZCVT cpu->NFlag = (cpu->Cpsr >> 31);     \
+                       cpu->ZFlag = (cpu->Cpsr >> 30) & 1; \
+                       cpu->CFlag = (cpu->Cpsr >> 29) & 1; \
+                       cpu->VFlag = (cpu->Cpsr >> 28) & 1; \
+                       cpu->TFlag = (cpu->Cpsr >> 5) & 1;
+
+    #define CurrentModeHasSPSR (cpu->Mode != SYSTEM32MODE) && (cpu->Mode != USER32MODE)
+    #define PC (cpu->Reg[15])
+
+    arm_inst* inst_base;
+    unsigned int addr;
+    unsigned int num_instrs = 0;
+
+    int ptr;
+
+    LOAD_NZCVT;
+    DISPATCH:
+    {
+        if (num_instrs >= cpu->NumInstrsToExecute)
+            goto END;
+
+        if (!cpu->NirqSig) {
+            if (!(cpu->Cpsr & 0x80)) {
+                goto END;
+            }
+        }
+
+        if (cpu->TFlag)
+            cpu->Reg[15] &= 0xfffffffe;
+        else
+            cpu->Reg[15] &= 0xfffffffc;
+
+        // Find the cached instruction cream, otherwise translate it...
+        auto itr = cpu->instruction_cache.find(cpu->Reg[15]);
+        if (itr != cpu->instruction_cache.end()) {
+            ptr = itr->second;
+        } else if (cpu->NumInstrsToExecute != 1) {
+            if (InterpreterTranslateBlock(cpu, ptr, cpu->Reg[15]) == FETCH_EXCEPTION)
+                goto END;
+        } else {
+            if (InterpreterTranslateSingle(cpu, ptr, cpu->Reg[15]) == FETCH_EXCEPTION)
+                goto END;
+        }
+
+        inst_base = (arm_inst *)&inst_buf[ptr];
+        GOTO_NEXT_INST;
+    }
+    ADC_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            adc_inst* const inst_cream = (adc_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            RD = AddWithCarry(rn_val, SHIFTER_OPERAND, cpu->CFlag, &carry, &overflow);
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                cpu->CFlag = carry;
+                cpu->VFlag = overflow;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(adc_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(adc_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    ADD_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            add_inst* const inst_cream = (add_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            RD = AddWithCarry(rn_val, SHIFTER_OPERAND, 0, &carry, &overflow);
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Cpsr & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                cpu->CFlag = carry;
+                cpu->VFlag = overflow;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(add_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(add_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    AND_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            and_inst* const inst_cream = (and_inst*)inst_base->component;
+
+            u32 lop = RN;
+            u32 rop = SHIFTER_OPERAND;
+
+            if (inst_cream->Rn == 15)
+                lop += 2 * cpu->GetInstructionSize();
+
+            RD = lop & rop;
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Cpsr & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                UPDATE_CFLAG_WITH_SC;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(and_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(and_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    BBL_INST:
+    {
+        if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+            bbl_inst *inst_cream = (bbl_inst *)inst_base->component;
+            if (inst_cream->L) {
+                LINK_RTN_ADDR;
+            }
+            SET_PC;
+            INC_PC(sizeof(bbl_inst));
+            goto DISPATCH;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(bbl_inst));
+        goto DISPATCH;
+    }
+    BIC_INST:
+    {
+        bic_inst *inst_cream = (bic_inst *)inst_base->component;
+        if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+            u32 lop = RN;
+            if (inst_cream->Rn == 15) {
+                lop += 2 * cpu->GetInstructionSize();
+            }
+            u32 rop = SHIFTER_OPERAND;
+            RD = lop & (~rop);
+            if ((inst_cream->S) && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                UPDATE_CFLAG_WITH_SC;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(bic_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(bic_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    BKPT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            bkpt_inst* const inst_cream = (bkpt_inst*)inst_base->component;
+            LOG_DEBUG(Core_ARM11, "Breakpoint instruction hit. Immediate: 0x%08X", inst_cream->imm);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(bkpt_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    BLX_INST:
+    {
+        blx_inst *inst_cream = (blx_inst *)inst_base->component;
+        if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+            unsigned int inst = inst_cream->inst;
+            if (BITS(inst, 20, 27) == 0x12 && BITS(inst, 4, 7) == 0x3) {
+                cpu->Reg[14] = (cpu->Reg[15] + cpu->GetInstructionSize());
+                if(cpu->TFlag)
+                    cpu->Reg[14] |= 0x1;
+                cpu->Reg[15] = cpu->Reg[inst_cream->val.Rm] & 0xfffffffe;
+                cpu->TFlag = cpu->Reg[inst_cream->val.Rm] & 0x1;
+            } else {
+                cpu->Reg[14] = (cpu->Reg[15] + cpu->GetInstructionSize());
+                cpu->TFlag = 0x1;
+                int signed_int = inst_cream->val.signed_immed_24;
+                signed_int = (signed_int & 0x800000) ? (0x3F000000 | signed_int) : signed_int;
+                signed_int = signed_int << 2;
+                cpu->Reg[15] = cpu->Reg[15] + 8 + signed_int + (BIT(inst, 24) << 1);
+            }
+            INC_PC(sizeof(blx_inst));
+            goto DISPATCH;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(blx_inst));
+        goto DISPATCH;
+    }
+
+    BX_INST:
+    BXJ_INST:
+    {
+        // Note that only the 'fail' case of BXJ is emulated. This is because
+        // the facilities for Jazelle emulation are not implemented.
+        //
+        // According to the ARM documentation on BXJ, if setting the J bit in the APSR
+        // fails, then BXJ functions identically like a regular BX instruction.
+        //
+        // This is sufficient for citra, as the CPU for the 3DS does not implement Jazelle.
+
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            bx_inst* const inst_cream = (bx_inst*)inst_base->component;
+
+            u32 address = RM;
+
+            if (inst_cream->Rm == 15)
+                address += 2 * cpu->GetInstructionSize();
+
+            cpu->TFlag   = address & 1;
+            cpu->Reg[15] = address & 0xfffffffe;
+            INC_PC(sizeof(bx_inst));
+            goto DISPATCH;
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(bx_inst));
+        goto DISPATCH;
+    }
+
+    CDP_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            // Undefined instruction here
+            cpu->NumInstrsToExecute = 0;
+            return num_instrs;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(cdp_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    CLREX_INST:
+    {
+        cpu->UnsetExclusiveMemoryAddress();
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(clrex_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    CLZ_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            clz_inst* inst_cream = (clz_inst*)inst_base->component;
+            RD = clz(RM);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(clz_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    CMN_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            cmn_inst* const inst_cream = (cmn_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            u32 result = AddWithCarry(rn_val, SHIFTER_OPERAND, 0, &carry, &overflow);
+
+            UPDATE_NFLAG(result);
+            UPDATE_ZFLAG(result);
+            cpu->CFlag = carry;
+            cpu->VFlag = overflow;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(cmn_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    CMP_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            cmp_inst* const inst_cream = (cmp_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            u32 result = AddWithCarry(rn_val, ~SHIFTER_OPERAND, 1, &carry, &overflow);
+
+            UPDATE_NFLAG(result);
+            UPDATE_ZFLAG(result);
+            cpu->CFlag = carry;
+            cpu->VFlag = overflow;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(cmp_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    CPS_INST:
+    {
+        cps_inst *inst_cream = (cps_inst *)inst_base->component;
+        u32 aif_val = 0;
+        u32 aif_mask = 0;
+        if (cpu->InAPrivilegedMode()) {
+            if (inst_cream->imod1) {
+                if (inst_cream->A) {
+                    aif_val |= (inst_cream->imod0 << 8);
+                    aif_mask |= 1 << 8;
+                }
+                if (inst_cream->I) {
+                    aif_val |= (inst_cream->imod0 << 7);
+                    aif_mask |= 1 << 7;
+                }
+                if (inst_cream->F) {
+                    aif_val |= (inst_cream->imod0 << 6);
+                    aif_mask |= 1 << 6;
+                }
+                aif_mask = ~aif_mask;
+                cpu->Cpsr = (cpu->Cpsr & aif_mask) | aif_val;
+            }
+            if (inst_cream->mmod) {
+                cpu->Cpsr = (cpu->Cpsr & 0xffffffe0) | inst_cream->mode;
+                cpu->ChangePrivilegeMode(inst_cream->mode);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(cps_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    CPY_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mov_inst* inst_cream = (mov_inst*)inst_base->component;
+
+            RD = SHIFTER_OPERAND;
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(mov_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mov_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    EOR_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            eor_inst* inst_cream = (eor_inst*)inst_base->component;
+
+            u32 lop = RN;
+            if (inst_cream->Rn == 15) {
+                lop += 2 * cpu->GetInstructionSize();
+            }
+            u32 rop = SHIFTER_OPERAND;
+            RD = lop ^ rop;
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                UPDATE_CFLAG_WITH_SC;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(eor_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(eor_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDC_INST:
+    {
+        // Instruction not implemented
+        //LOG_CRITICAL(Core_ARM11, "unimplemented instruction");
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldc_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDM_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            unsigned int inst = inst_cream->inst;
+            if (BIT(inst, 22) && !BIT(inst, 15)) {
+                for (int i = 0; i < 13; i++) {
+                    if(BIT(inst, i)) {
+                        cpu->Reg[i] = cpu->ReadMemory32(addr);
+                        addr += 4;
+                    }
+                }
+                if (BIT(inst, 13)) {
+                    if (cpu->Mode == USER32MODE)
+                        cpu->Reg[13] = cpu->ReadMemory32(addr);
+                    else
+                        cpu->Reg_usr[0] = cpu->ReadMemory32(addr);
+
+                    addr += 4;
+                }
+                if (BIT(inst, 14)) {
+                    if (cpu->Mode == USER32MODE)
+                        cpu->Reg[14] = cpu->ReadMemory32(addr);
+                    else
+                        cpu->Reg_usr[1] = cpu->ReadMemory32(addr);
+
+                    addr += 4;
+                }
+            } else if (!BIT(inst, 22)) {
+                for(int i = 0; i < 16; i++ ){
+                    if(BIT(inst, i)){
+                        unsigned int ret = cpu->ReadMemory32(addr);
+
+                        // For armv5t, should enter thumb when bits[0] is non-zero.
+                        if(i == 15){
+                            cpu->TFlag = ret & 0x1;
+                            ret &= 0xFFFFFFFE;
+                        }
+
+                        cpu->Reg[i] = ret;
+                        addr += 4;
+                    }
+                }
+            } else if (BIT(inst, 22) && BIT(inst, 15)) {
+                for(int i = 0; i < 15; i++ ){
+                    if(BIT(inst, i)){
+                        cpu->Reg[i] = cpu->ReadMemory32(addr);
+                        addr += 4;
+                     }
+                 }
+
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Cpsr & 0x1F);
+                    LOAD_NZCVT;
+                }
+
+                cpu->Reg[15] = cpu->ReadMemory32(addr);
+            }
+
+            if (BIT(inst, 15)) {
+                INC_PC(sizeof(ldst_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SXTH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            sxth_inst* inst_cream = (sxth_inst*)inst_base->component;
+
+            unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate);
+            if (BIT(operand2, 15)) {
+                operand2 |= 0xffff0000;
+            } else {
+                operand2 &= 0xffff;
+            }
+            RD = operand2;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(sxth_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDR_INST:
+    {
+        ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+        inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+        unsigned int value = cpu->ReadMemory32(addr);
+        cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
+
+        if (BITS(inst_cream->inst, 12, 15) == 15) {
+            // For armv5t, should enter thumb when bits[0] is non-zero.
+            cpu->TFlag = value & 0x1;
+            cpu->Reg[15] &= 0xFFFFFFFE;
+            INC_PC(sizeof(ldst_inst));
+            goto DISPATCH;
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRCOND_INST:
+    {
+        if (CondPassed(cpu, inst_base->cond)) {
+            ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            unsigned int value = cpu->ReadMemory32(addr);
+            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
+
+            if (BITS(inst_cream->inst, 12, 15) == 15) {
+                // For armv5t, should enter thumb when bits[0] is non-zero.
+                cpu->TFlag = value & 0x1;
+                cpu->Reg[15] &= 0xFFFFFFFE;
+                INC_PC(sizeof(ldst_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    UXTH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            uxth_inst* inst_cream = (uxth_inst*)inst_base->component;
+            RD = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xffff;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(uxth_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    UXTAH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            uxtah_inst* inst_cream = (uxtah_inst*)inst_base->component;
+            unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xffff;
+
+            RD = RN + operand2;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(uxtah_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = cpu->ReadMemory8(addr);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRBT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            const u32 dest_index = BITS(inst_cream->inst, 12, 15);
+            const u32 previous_mode = cpu->Mode;
+
+            cpu->ChangePrivilegeMode(USER32MODE);
+            const u8 value = cpu->ReadMemory8(addr);
+            cpu->ChangePrivilegeMode(previous_mode);
+
+            cpu->Reg[dest_index] = value;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRD_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            // Should check if RD is even-numbered, Rd != 14, addr[0:1] == 0, (CP15_reg1_U == 1 || addr[2] == 0)
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            // The 3DS doesn't have LPAE (Large Physical Access Extension), so it
+            // wouldn't do this as a single read.
+            cpu->Reg[BITS(inst_cream->inst, 12, 15) + 0] = cpu->ReadMemory32(addr);
+            cpu->Reg[BITS(inst_cream->inst, 12, 15) + 1] = cpu->ReadMemory32(addr + 4);
+
+            // No dispatch since this operation should not modify R15
+        }
+        cpu->Reg[15] += 4;
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    LDREX_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int read_addr = RN;
+
+            cpu->SetExclusiveMemoryAddress(read_addr);
+
+            RD = cpu->ReadMemory32(read_addr);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDREXB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int read_addr = RN;
+
+            cpu->SetExclusiveMemoryAddress(read_addr);
+
+            RD = cpu->ReadMemory8(read_addr);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDREXH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int read_addr = RN;
+
+            cpu->SetExclusiveMemoryAddress(read_addr);
+
+            RD = cpu->ReadMemory16(read_addr);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDREXD_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int read_addr = RN;
+
+            cpu->SetExclusiveMemoryAddress(read_addr);
+
+            RD  = cpu->ReadMemory32(read_addr);
+            RD2 = cpu->ReadMemory32(read_addr + 4);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = cpu->ReadMemory16(addr);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRSB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+            unsigned int value = cpu->ReadMemory8(addr);
+            if (BIT(value, 7)) {
+                value |= 0xffffff00;
+            }
+            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRSH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            unsigned int value = cpu->ReadMemory16(addr);
+            if (BIT(value, 15)) {
+                value |= 0xffff0000;
+            }
+            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDRT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            const u32 dest_index = BITS(inst_cream->inst, 12, 15);
+            const u32 previous_mode = cpu->Mode;
+
+            cpu->ChangePrivilegeMode(USER32MODE);
+            const u32 value = cpu->ReadMemory32(addr);
+            cpu->ChangePrivilegeMode(previous_mode);
+
+            cpu->Reg[dest_index] = value;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    MCR_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mcr_inst* inst_cream = (mcr_inst*)inst_base->component;
+
+            unsigned int inst = inst_cream->inst;
+            if (inst_cream->Rd == 15) {
+                DEBUG_MSG;
+            } else {
+                if (inst_cream->cp_num == 15)
+                    cpu->WriteCP15Register(RD, CRn, OPCODE_1, CRm, OPCODE_2);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mcr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    MCRR_INST:
+    {
+        // Stubbed, as the MPCore doesn't have any registers that are accessible
+        // through this instruction.
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component;
+
+            LOG_ERROR(Core_ARM11, "MCRR executed | Coprocessor: %u, CRm %u, opc1: %u, Rt: %u, Rt2: %u",
+                      inst_cream->cp_num, inst_cream->crm, inst_cream->opcode_1, inst_cream->rt, inst_cream->rt2);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mcrr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    MLA_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mla_inst* inst_cream = (mla_inst*)inst_base->component;
+
+            u64 rm = RM;
+            u64 rs = RS;
+            u64 rn = RN;
+
+            RD = static_cast<u32>((rm * rs + rn) & 0xffffffff);
+            if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mla_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    MOV_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mov_inst* inst_cream = (mov_inst*)inst_base->component;
+
+            RD = SHIFTER_OPERAND;
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                UPDATE_CFLAG_WITH_SC;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(mov_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mov_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    MRC_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mrc_inst* inst_cream = (mrc_inst*)inst_base->component;
+
+            if (inst_cream->cp_num == 15) {
+                const uint32_t value = cpu->ReadCP15Register(CRn, OPCODE_1, CRm, OPCODE_2);
+
+                if (inst_cream->Rd == 15) {
+                    cpu->Cpsr = (cpu->Cpsr & ~0xF0000000) | (value & 0xF0000000);
+                    LOAD_NZCVT;
+                } else {
+                    RD = value;
+                }
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mrc_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    MRRC_INST:
+    {
+        // Stubbed, as the MPCore doesn't have any registers that are accessible
+        // through this instruction.
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component;
+
+            LOG_ERROR(Core_ARM11, "MRRC executed | Coprocessor: %u, CRm %u, opc1: %u, Rt: %u, Rt2: %u",
+                      inst_cream->cp_num, inst_cream->crm, inst_cream->opcode_1, inst_cream->rt, inst_cream->rt2);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mcrr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    MRS_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mrs_inst* inst_cream = (mrs_inst*)inst_base->component;
+
+            if (inst_cream->R) {
+                RD = cpu->Spsr_copy;
+            } else {
+                SAVE_NZCVT;
+                RD = cpu->Cpsr;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mrs_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    MSR_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            msr_inst* inst_cream = (msr_inst*)inst_base->component;
+            const u32 UserMask = 0xf80f0200, PrivMask = 0x000001df, StateMask = 0x01000020;
+            unsigned int inst = inst_cream->inst;
+            unsigned int operand;
+
+            if (BIT(inst, 25)) {
+                int rot_imm = BITS(inst, 8, 11) * 2;
+                operand = ROTATE_RIGHT_32(BITS(inst, 0, 7), rot_imm);
+            } else {
+                operand = cpu->Reg[BITS(inst, 0, 3)];
+            }
+            u32 byte_mask = (BIT(inst, 16) ? 0xff : 0) | (BIT(inst, 17) ? 0xff00 : 0)
+                        | (BIT(inst, 18) ? 0xff0000 : 0) | (BIT(inst, 19) ? 0xff000000 : 0);
+            u32 mask = 0;
+            if (!inst_cream->R) {
+                if (cpu->InAPrivilegedMode()) {
+                    if ((operand & StateMask) != 0) {
+                        /// UNPREDICTABLE
+                        DEBUG_MSG;
+                    } else
+                        mask = byte_mask & (UserMask | PrivMask);
+                } else {
+                    mask = byte_mask & UserMask;
+                }
+                SAVE_NZCVT;
+
+                cpu->Cpsr = (cpu->Cpsr & ~mask) | (operand & mask);
+                cpu->ChangePrivilegeMode(cpu->Cpsr & 0x1F);
+                LOAD_NZCVT;
+            } else {
+                if (CurrentModeHasSPSR) {
+                    mask = byte_mask & (UserMask | PrivMask | StateMask);
+                    cpu->Spsr_copy = (cpu->Spsr_copy & ~mask) | (operand & mask);
+                }
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(msr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    MUL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mul_inst* inst_cream = (mul_inst*)inst_base->component;
+
+            u64 rm = RM;
+            u64 rs = RS;
+            RD = static_cast<u32>((rm * rs) & 0xffffffff);
+            if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mul_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    MVN_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            mvn_inst* const inst_cream = (mvn_inst*)inst_base->component;
+
+            RD = ~SHIFTER_OPERAND;
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                UPDATE_CFLAG_WITH_SC;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(mvn_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(mvn_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    ORR_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            orr_inst* const inst_cream = (orr_inst*)inst_base->component;
+
+            u32 lop = RN;
+            u32 rop = SHIFTER_OPERAND;
+
+            if (inst_cream->Rn == 15)
+                lop += 2 * cpu->GetInstructionSize();
+
+            RD = lop | rop;
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                UPDATE_CFLAG_WITH_SC;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(orr_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(orr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    NOP_INST:
+    {
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC_STUB;
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    PKHBT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            pkh_inst *inst_cream = (pkh_inst *)inst_base->component;
+            RD = (RN & 0xFFFF) | ((RM << inst_cream->imm) & 0xFFFF0000);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(pkh_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    PKHTB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            pkh_inst *inst_cream = (pkh_inst *)inst_base->component;
+            int shift_imm = inst_cream->imm ? inst_cream->imm : 31;
+            RD = ((static_cast<s32>(RM) >> shift_imm) & 0xFFFF) | (RN & 0xFFFF0000);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(pkh_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    PLD_INST:
+    {
+        // Not implemented. PLD is a hint instruction, so it's optional.
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(pld_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    QADD_INST:
+    QDADD_INST:
+    QDSUB_INST:
+    QSUB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+            const u8 op1 = inst_cream->op1;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+
+            u32 result = 0;
+
+            // QADD
+            if (op1 == 0x00) {
+                result = rm_val + rn_val;
+
+                if (AddOverflow(rm_val, rn_val, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+            // QSUB
+            else if (op1 == 0x01) {
+                result = rm_val - rn_val;
+
+                if (SubOverflow(rm_val, rn_val, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+            // QDADD
+            else if (op1 == 0x02) {
+                u32 mul = (rn_val * 2);
+
+                if (AddOverflow(rn_val, rn_val, rn_val * 2)) {
+                    mul = POS(mul) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+
+                result = mul + rm_val;
+
+                if (AddOverflow(rm_val, mul, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+            // QDSUB
+            else if (op1 == 0x03) {
+                u32 mul = (rn_val * 2);
+
+                if (AddOverflow(rn_val, rn_val, mul)) {
+                    mul = POS(mul) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+
+                result = rm_val - mul;
+
+                if (SubOverflow(rm_val, mul, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+
+            RD = result;
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    QADD8_INST:
+    QADD16_INST:
+    QADDSUBX_INST:
+    QSUB8_INST:
+    QSUB16_INST:
+    QSUBADDX_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+            const u16 rm_lo = (RM & 0xFFFF);
+            const u16 rm_hi = ((RM >> 16) & 0xFFFF);
+            const u16 rn_lo = (RN & 0xFFFF);
+            const u16 rn_hi = ((RN >> 16) & 0xFFFF);
+            const u8 op2    = inst_cream->op2;
+
+            u16 lo_result = 0;
+            u16 hi_result = 0;
+
+            // QADD16
+            if (op2 == 0x00) {
+                lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_lo);
+                hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_hi);
+            }
+            // QASX
+            else if (op2 == 0x01) {
+                lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_hi);
+                hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_lo);
+            }
+            // QSAX
+            else if (op2 == 0x02) {
+                lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_hi);
+                hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_lo);
+            }
+            // QSUB16
+            else if (op2 == 0x03) {
+                lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_lo);
+                hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_hi);
+            }
+            // QADD8
+            else if (op2 == 0x04) {
+                lo_result = ARMul_SignedSaturatedAdd8(rn_lo & 0xFF, rm_lo & 0xFF) |
+                            ARMul_SignedSaturatedAdd8(rn_lo >> 8, rm_lo >> 8) << 8;
+                hi_result = ARMul_SignedSaturatedAdd8(rn_hi & 0xFF, rm_hi & 0xFF) |
+                            ARMul_SignedSaturatedAdd8(rn_hi >> 8, rm_hi >> 8) << 8;
+            }
+            // QSUB8
+            else if (op2 == 0x07) {
+                lo_result = ARMul_SignedSaturatedSub8(rn_lo & 0xFF, rm_lo & 0xFF) |
+                            ARMul_SignedSaturatedSub8(rn_lo >> 8, rm_lo >> 8) << 8;
+                hi_result = ARMul_SignedSaturatedSub8(rn_hi & 0xFF, rm_hi & 0xFF) |
+                            ARMul_SignedSaturatedSub8(rn_hi >> 8, rm_hi >> 8) << 8;
+            }
+
+            RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    REV_INST:
+    REV16_INST:
+    REVSH_INST:
+    {
+
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            rev_inst* const inst_cream = (rev_inst*)inst_base->component;
+
+            const u8 op1 = inst_cream->op1;
+            const u8 op2 = inst_cream->op2;
+
+            // REV
+            if (op1 == 0x03 && op2 == 0x01) {
+                RD = ((RM & 0xFF) << 24) | (((RM >> 8) & 0xFF) << 16) | (((RM >> 16) & 0xFF) << 8) | ((RM >> 24) & 0xFF);
+            }
+            // REV16
+            else if (op1 == 0x03 && op2 == 0x05) {
+                RD = ((RM & 0xFF) << 8) | ((RM & 0xFF00) >> 8) | ((RM & 0xFF0000) << 8) | ((RM & 0xFF000000) >> 8);
+            }
+            // REVSH
+            else if (op1 == 0x07 && op2 == 0x05) {
+                RD = ((RM & 0xFF) << 8) | ((RM & 0xFF00) >> 8);
+                if (RD & 0x8000)
+                    RD |= 0xffff0000;
+            }
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(rev_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    RFE_INST:
+    {
+        // RFE is unconditional
+        ldst_inst* const inst_cream = (ldst_inst*)inst_base->component;
+
+        u32 address = 0;
+        inst_cream->get_addr(cpu, inst_cream->inst, address);
+
+        cpu->Cpsr    = cpu->ReadMemory32(address);
+        cpu->Reg[15] = cpu->ReadMemory32(address + 4);
+
+        INC_PC(sizeof(ldst_inst));
+        goto DISPATCH;
+    }
+
+    RSB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            rsb_inst* const inst_cream = (rsb_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            RD = AddWithCarry(~rn_val, SHIFTER_OPERAND, 1, &carry, &overflow);
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                cpu->CFlag = carry;
+                cpu->VFlag = overflow;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(rsb_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(rsb_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    RSC_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            rsc_inst* const inst_cream = (rsc_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            RD = AddWithCarry(~rn_val, SHIFTER_OPERAND, cpu->CFlag, &carry, &overflow);
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                cpu->CFlag = carry;
+                cpu->VFlag = overflow;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(rsc_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(rsc_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SADD8_INST:
+    SSUB8_INST:
+    SADD16_INST:
+    SADDSUBX_INST:
+    SSUBADDX_INST:
+    SSUB16_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+            const u8 op2 = inst_cream->op2;
+
+            if (op2 == 0x00 || op2 == 0x01 || op2 == 0x02 || op2 == 0x03) {
+                const s16 rn_lo = (RN & 0xFFFF);
+                const s16 rn_hi = ((RN >> 16) & 0xFFFF);
+                const s16 rm_lo = (RM & 0xFFFF);
+                const s16 rm_hi = ((RM >> 16) & 0xFFFF);
+
+                s32 lo_result = 0;
+                s32 hi_result = 0;
+
+                // SADD16
+                if (inst_cream->op2 == 0x00) {
+                    lo_result = (rn_lo + rm_lo);
+                    hi_result = (rn_hi + rm_hi);
+                }
+                // SASX
+                else if (op2 == 0x01) {
+                    lo_result = (rn_lo - rm_hi);
+                    hi_result = (rn_hi + rm_lo);
+                }
+                // SSAX
+                else if (op2 == 0x02) {
+                    lo_result = (rn_lo + rm_hi);
+                    hi_result = (rn_hi - rm_lo);
+                }
+                // SSUB16
+                else if (op2 == 0x03) {
+                    lo_result = (rn_lo - rm_lo);
+                    hi_result = (rn_hi - rm_hi);
+                }
+
+                RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
+
+                if (lo_result >= 0) {
+                    cpu->Cpsr |= (1 << 16);
+                    cpu->Cpsr |= (1 << 17);
+                } else {
+                    cpu->Cpsr &= ~(1 << 16);
+                    cpu->Cpsr &= ~(1 << 17);
+                }
+
+                if (hi_result >= 0) {
+                    cpu->Cpsr |= (1 << 18);
+                    cpu->Cpsr |= (1 << 19);
+                } else {
+                    cpu->Cpsr &= ~(1 << 18);
+                    cpu->Cpsr &= ~(1 << 19);
+                }
+            }
+            else if (op2 == 0x04 || op2 == 0x07) {
+                s32 lo_val1, lo_val2;
+                s32 hi_val1, hi_val2;
+
+                // SADD8
+                if (op2 == 0x04) {
+                    lo_val1 = (s32)(s8)(RN & 0xFF) + (s32)(s8)(RM & 0xFF);
+                    lo_val2 = (s32)(s8)((RN >> 8) & 0xFF)  + (s32)(s8)((RM >> 8) & 0xFF);
+                    hi_val1 = (s32)(s8)((RN >> 16) & 0xFF) + (s32)(s8)((RM >> 16) & 0xFF);
+                    hi_val2 = (s32)(s8)((RN >> 24) & 0xFF) + (s32)(s8)((RM >> 24) & 0xFF);
+                }
+                // SSUB8
+                else {
+                    lo_val1 = (s32)(s8)(RN & 0xFF) - (s32)(s8)(RM & 0xFF);
+                    lo_val2 = (s32)(s8)((RN >> 8) & 0xFF) - (s32)(s8)((RM >> 8) & 0xFF);
+                    hi_val1 = (s32)(s8)((RN >> 16) & 0xFF) - (s32)(s8)((RM >> 16) & 0xFF);
+                    hi_val2 = (s32)(s8)((RN >> 24) & 0xFF) - (s32)(s8)((RM >> 24) & 0xFF);
+                }
+
+                RD =  ((lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24));
+
+                if (lo_val1 >= 0)
+                    cpu->Cpsr |= (1 << 16);
+                else
+                    cpu->Cpsr &= ~(1 << 16);
+
+                if (lo_val2 >= 0)
+                    cpu->Cpsr |= (1 << 17);
+                else
+                    cpu->Cpsr &= ~(1 << 17);
+
+                if (hi_val1 >= 0)
+                    cpu->Cpsr |= (1 << 18);
+                else
+                    cpu->Cpsr &= ~(1 << 18);
+
+                if (hi_val2 >= 0)
+                    cpu->Cpsr |= (1 << 19);
+                else
+                    cpu->Cpsr &= ~(1 << 19);
+            }
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SBC_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            sbc_inst* const inst_cream = (sbc_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            RD = AddWithCarry(rn_val, ~SHIFTER_OPERAND, cpu->CFlag, &carry, &overflow);
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                cpu->CFlag = carry;
+                cpu->VFlag = overflow;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(sbc_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(sbc_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SEL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+            const u32 to = RM;
+            const u32 from = RN;
+            const u32 cpsr = cpu->Cpsr;
+
+            u32 result;
+            if (cpsr & (1 << 16))
+                result = from & 0xff;
+            else
+                result = to & 0xff;
+
+            if (cpsr & (1 << 17))
+                result |= from & 0x0000ff00;
+            else
+                result |= to & 0x0000ff00;
+
+            if (cpsr & (1 << 18))
+                result |= from & 0x00ff0000;
+            else
+                result |= to & 0x00ff0000;
+
+            if (cpsr & (1 << 19))
+                result |= from & 0xff000000;
+            else
+                result |= to & 0xff000000;
+
+            RD = result;
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SETEND_INST:
+    {
+        // SETEND is unconditional
+        setend_inst* const inst_cream = (setend_inst*)inst_base->component;
+        const bool big_endian = (inst_cream->set_bigend == 1);
+
+        if (big_endian)
+            cpu->Cpsr |= (1 << 9);
+        else
+            cpu->Cpsr &= ~(1 << 9);
+
+        LOG_WARNING(Core_ARM11, "SETEND %s executed", big_endian ? "BE" : "LE");
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(setend_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SEV_INST:
+    {
+        // Stubbed, as SEV is a hint instruction.
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            LOG_TRACE(Core_ARM11, "SEV executed.");
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC_STUB;
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SHADD8_INST:
+    SHADD16_INST:
+    SHADDSUBX_INST:
+    SHSUB8_INST:
+    SHSUB16_INST:
+    SHSUBADDX_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+            const u8 op2 = inst_cream->op2;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+
+            if (op2 == 0x00 || op2 == 0x01 || op2 == 0x02 || op2 == 0x03) {
+                s32 lo_result = 0;
+                s32 hi_result = 0;
+
+                // SHADD16
+                if (op2 == 0x00) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) + (s16)(rm_val & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) + (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                }
+                // SHASX
+                else if (op2 == 0x01) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) - (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) + (s16)(rm_val & 0xFFFF)) >> 1;
+                }
+                // SHSAX
+                else if (op2 == 0x02) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) + (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) - (s16)(rm_val & 0xFFFF)) >> 1;
+                }
+                // SHSUB16
+                else if (op2 == 0x03) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) - (s16)(rm_val & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) - (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                }
+
+                RD = ((lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16));
+            }
+            else if (op2 == 0x04 || op2 == 0x07) {
+                s16 lo_val1, lo_val2;
+                s16 hi_val1, hi_val2;
+
+                // SHADD8
+                if (op2 == 0x04) {
+                    lo_val1 = ((s8)(rn_val & 0xFF) + (s8)(rm_val & 0xFF)) >> 1;
+                    lo_val2 = ((s8)((rn_val >> 8) & 0xFF) + (s8)((rm_val >> 8) & 0xFF)) >> 1;
+
+                    hi_val1 = ((s8)((rn_val >> 16) & 0xFF) + (s8)((rm_val >> 16) & 0xFF)) >> 1;
+                    hi_val2 = ((s8)((rn_val >> 24) & 0xFF) + (s8)((rm_val >> 24) & 0xFF)) >> 1;
+                }
+                // SHSUB8
+                else {
+                    lo_val1 = ((s8)(rn_val & 0xFF) - (s8)(rm_val & 0xFF)) >> 1;
+                    lo_val2 = ((s8)((rn_val >> 8) & 0xFF) - (s8)((rm_val >> 8) & 0xFF)) >> 1;
+
+                    hi_val1 = ((s8)((rn_val >> 16) & 0xFF) - (s8)((rm_val >> 16) & 0xFF)) >> 1;
+                    hi_val2 = ((s8)((rn_val >> 24) & 0xFF) - (s8)((rm_val >> 24) & 0xFF)) >> 1;
+                }
+
+                RD = (lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24);
+            }
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMLA_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smla_inst* inst_cream = (smla_inst*)inst_base->component;
+            s32 operand1, operand2;
+            if (inst_cream->x == 0)
+                operand1 = (BIT(RM, 15)) ? (BITS(RM, 0, 15) | 0xffff0000) : BITS(RM, 0, 15);
+            else
+                operand1 = (BIT(RM, 31)) ? (BITS(RM, 16, 31) | 0xffff0000) : BITS(RM, 16, 31);
+
+            if (inst_cream->y == 0)
+                operand2 = (BIT(RS, 15)) ? (BITS(RS, 0, 15) | 0xffff0000) : BITS(RS, 0, 15);
+            else
+                operand2 = (BIT(RS, 31)) ? (BITS(RS, 16, 31) | 0xffff0000) : BITS(RS, 16, 31);
+            RD = operand1 * operand2 + RN;
+
+            if (AddOverflow(operand1 * operand2, RN, RD))
+                cpu->Cpsr |= (1 << 27);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smla_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMLAD_INST:
+    SMLSD_INST:
+    SMUAD_INST:
+    SMUSD_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+            const u8 op2 = inst_cream->op2;
+
+            u32 rm_val = cpu->Reg[inst_cream->Rm];
+            const u32 rn_val = cpu->Reg[inst_cream->Rn];
+
+            if (inst_cream->m)
+                rm_val = (((rm_val & 0xFFFF) << 16) | (rm_val >> 16));
+
+            const s16 rm_lo = (rm_val & 0xFFFF);
+            const s16 rm_hi = ((rm_val >> 16) & 0xFFFF);
+            const s16 rn_lo = (rn_val & 0xFFFF);
+            const s16 rn_hi = ((rn_val >> 16) & 0xFFFF);
+
+            const u32 product1 = (rn_lo * rm_lo);
+            const u32 product2 = (rn_hi * rm_hi);
+
+            // SMUAD and SMLAD
+            if (BIT(op2, 1) == 0) {
+                RD = (product1 + product2);
+
+                if (inst_cream->Ra != 15) {
+                    RD += cpu->Reg[inst_cream->Ra];
+
+                    if (ARMul_AddOverflowQ(product1 + product2, cpu->Reg[inst_cream->Ra]))
+                        cpu->Cpsr |= (1 << 27);
+                }
+
+                if (ARMul_AddOverflowQ(product1, product2))
+                    cpu->Cpsr |= (1 << 27);
+            }
+            // SMUSD and SMLSD
+            else {
+                RD = (product1 - product2);
+
+                if (inst_cream->Ra != 15) {
+                    RD += cpu->Reg[inst_cream->Ra];
+
+                    if (ARMul_AddOverflowQ(product1 - product2, cpu->Reg[inst_cream->Ra]))
+                        cpu->Cpsr |= (1 << 27);
+                }
+            }
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smlad_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMLAL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            umlal_inst* inst_cream = (umlal_inst*)inst_base->component;
+            long long int rm = RM;
+            long long int rs = RS;
+            if (BIT(rm, 31)) {
+                rm |= 0xffffffff00000000LL;
+            }
+            if (BIT(rs, 31)) {
+                rs |= 0xffffffff00000000LL;
+            }
+            long long int rst = rm * rs;
+            long long int rdhi32 = RDHI;
+            long long int hilo = (rdhi32 << 32) + RDLO;
+            rst += hilo;
+            RDLO = BITS(rst,  0, 31);
+            RDHI = BITS(rst, 32, 63);
+            if (inst_cream->S) {
+                cpu->NFlag = BIT(RDHI, 31);
+                cpu->ZFlag = (RDHI == 0 && RDLO == 0);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(umlal_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMLALXY_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smlalxy_inst* const inst_cream = (smlalxy_inst*)inst_base->component;
+
+            u64 operand1 = RN;
+            u64 operand2 = RM;
+
+            if (inst_cream->x != 0)
+                operand1 >>= 16;
+            if (inst_cream->y != 0)
+                operand2 >>= 16;
+            operand1 &= 0xFFFF;
+            if (operand1 & 0x8000)
+                operand1 -= 65536;
+            operand2 &= 0xFFFF;
+            if (operand2 & 0x8000)
+                operand2 -= 65536;
+
+            u64 dest = ((u64)RDHI << 32 | RDLO) + (operand1 * operand2);
+            RDLO = (dest & 0xFFFFFFFF);
+            RDHI = ((dest >> 32) & 0xFFFFFFFF);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smlalxy_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMLAW_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+            const u32 ra_val = cpu->Reg[inst_cream->Ra];
+            const bool high = (inst_cream->m == 1);
+
+            const s16 operand2 = (high) ? ((rm_val >> 16) & 0xFFFF) : (rm_val & 0xFFFF);
+            const s64 result = (s64)(s32)rn_val * (s64)(s32)operand2 + ((s64)(s32)ra_val << 16);
+
+            RD = BITS(result, 16, 47);
+
+            if ((result >> 16) != (s32)RD)
+                cpu->Cpsr |= (1 << 27);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smlad_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMLALD_INST:
+    SMLSLD_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smlald_inst* const inst_cream = (smlald_inst*)inst_base->component;
+
+            const bool do_swap = (inst_cream->swap == 1);
+            const u32 rdlo_val = RDLO;
+            const u32 rdhi_val = RDHI;
+            const u32 rn_val   = RN;
+            u32 rm_val         = RM;
+
+            if (do_swap)
+                rm_val = (((rm_val & 0xFFFF) << 16) | (rm_val >> 16));
+
+            const s32 product1 = (s16)(rn_val & 0xFFFF) * (s16)(rm_val & 0xFFFF);
+            const s32 product2 = (s16)((rn_val >> 16) & 0xFFFF) * (s16)((rm_val >> 16) & 0xFFFF);
+            s64 result;
+
+            // SMLALD
+            if (BIT(inst_cream->op2, 1) == 0) {
+                result = (product1 + product2) + (s64)(rdlo_val | ((s64)rdhi_val << 32));
+            }
+            // SMLSLD
+            else {
+                result = (product1 - product2) + (s64)(rdlo_val | ((s64)rdhi_val << 32));
+            }
+
+            RDLO = (result & 0xFFFFFFFF);
+            RDHI = ((result >> 32) & 0xFFFFFFFF);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smlald_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMMLA_INST:
+    SMMLS_INST:
+    SMMUL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+            const bool do_round = (inst_cream->m == 1);
+
+            // Assume SMMUL by default.
+            s64 result = (s64)(s32)rn_val * (s64)(s32)rm_val;
+
+            if (inst_cream->Ra != 15) {
+                const u32 ra_val = cpu->Reg[inst_cream->Ra];
+
+                // SMMLA, otherwise SMMLS
+                if (BIT(inst_cream->op2, 1) == 0)
+                    result += ((s64)ra_val << 32);
+                else
+                    result = ((s64)ra_val << 32) - result;
+            }
+
+            if (do_round)
+                result += 0x80000000;
+
+            RD = ((result >> 32) & 0xFFFFFFFF);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smlad_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMUL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smul_inst* inst_cream = (smul_inst*)inst_base->component;
+            u32 operand1, operand2;
+            if (inst_cream->x == 0)
+                operand1 = (BIT(RM, 15)) ? (BITS(RM, 0, 15) | 0xffff0000) : BITS(RM, 0, 15);
+            else
+                operand1 = (BIT(RM, 31)) ? (BITS(RM, 16, 31) | 0xffff0000) : BITS(RM, 16, 31);
+
+            if (inst_cream->y == 0)
+                operand2 = (BIT(RS, 15)) ? (BITS(RS, 0, 15) | 0xffff0000) : BITS(RS, 0, 15);
+            else
+                operand2 = (BIT(RS, 31)) ? (BITS(RS, 16, 31) | 0xffff0000) : BITS(RS, 16, 31);
+            RD = operand1 * operand2;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smul_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SMULL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            umull_inst* inst_cream = (umull_inst*)inst_base->component;
+            s64 rm = RM;
+            s64 rs = RS;
+            if (BIT(rm, 31)) {
+                rm |= 0xffffffff00000000LL;
+            }
+            if (BIT(rs, 31)) {
+                rs |= 0xffffffff00000000LL;
+            }
+            s64 rst = rm * rs;
+            RDHI = BITS(rst, 32, 63);
+            RDLO = BITS(rst,  0, 31);
+
+            if (inst_cream->S) {
+                cpu->NFlag = BIT(RDHI, 31);
+                cpu->ZFlag = (RDHI == 0 && RDLO == 0);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(umull_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMULW_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+            s16 rm = (inst_cream->m == 1) ? ((RM >> 16) & 0xFFFF) : (RM & 0xFFFF);
+
+            s64 result = (s64)rm * (s64)(s32)RN;
+            RD = BITS(result, 16, 47);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(smlad_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SRS_INST:
+    {
+        // SRS is unconditional
+        ldst_inst* const inst_cream = (ldst_inst*)inst_base->component;
+
+        u32 address = 0;
+        inst_cream->get_addr(cpu, inst_cream->inst, address);
+
+        cpu->WriteMemory32(address + 0, cpu->Reg[14]);
+        cpu->WriteMemory32(address + 4, cpu->Spsr_copy);
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SSAT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ssat_inst* const inst_cream = (ssat_inst*)inst_base->component;
+
+            u8 shift_type = inst_cream->shift_type;
+            u8 shift_amount = inst_cream->imm5;
+            u32 rn_val = RN;
+
+            // 32-bit ASR is encoded as an amount of 0.
+            if (shift_type == 1 && shift_amount == 0)
+                shift_amount = 31;
+
+            if (shift_type == 0)
+                rn_val <<= shift_amount;
+            else if (shift_type == 1)
+                rn_val = ((s32)rn_val >> shift_amount);
+
+            bool saturated = false;
+            rn_val = ARMul_SignedSatQ(rn_val, inst_cream->sat_imm, &saturated);
+
+            if (saturated)
+                cpu->Cpsr |= (1 << 27);
+
+            RD = rn_val;
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ssat_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SSAT16_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ssat_inst* const inst_cream = (ssat_inst*)inst_base->component;
+            const u8 saturate_to = inst_cream->sat_imm;
+
+            bool sat1 = false;
+            bool sat2 = false;
+
+            RD = (ARMul_SignedSatQ((s16)RN, saturate_to, &sat1) & 0xFFFF) |
+                 ARMul_SignedSatQ((s32)RN >> 16, saturate_to, &sat2) << 16;
+
+            if (sat1 || sat2)
+                cpu->Cpsr |= (1 << 27);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ssat_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    STC_INST:
+    {
+        // Instruction not implemented
+        //LOG_CRITICAL(Core_ARM11, "unimplemented instruction");
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(stc_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STM_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            unsigned int inst = inst_cream->inst;
+
+            unsigned int Rn = BITS(inst, 16, 19);
+            unsigned int old_RN = cpu->Reg[Rn];
+
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+            if (BIT(inst_cream->inst, 22) == 1) {
+                for (int i = 0; i < 13; i++) {
+                    if (BIT(inst_cream->inst, i)) {
+                        cpu->WriteMemory32(addr, cpu->Reg[i]);
+                        addr += 4;
+                    }
+                }
+                if (BIT(inst_cream->inst, 13)) {
+                    if (cpu->Mode == USER32MODE)
+                        cpu->WriteMemory32(addr, cpu->Reg[13]);
+                    else
+                        cpu->WriteMemory32(addr, cpu->Reg_usr[0]);
+
+                    addr += 4;
+                }
+                if (BIT(inst_cream->inst, 14)) {
+                    if (cpu->Mode == USER32MODE)
+                        cpu->WriteMemory32(addr, cpu->Reg[14]);
+                    else
+                        cpu->WriteMemory32(addr, cpu->Reg_usr[1]);
+
+                    addr += 4;
+                }
+                if (BIT(inst_cream->inst, 15)) {
+                    cpu->WriteMemory32(addr, cpu->Reg_usr[1] + 8);
+                }
+            } else {
+                for (size_t i = 0; i < 15; i++) {
+                    if (BIT(inst_cream->inst, i)) {
+                        if (i == Rn)
+                            cpu->WriteMemory32(addr, old_RN);
+                        else
+                            cpu->WriteMemory32(addr, cpu->Reg[i]);
+
+                        addr += 4;
+                    }
+                }
+
+                // Check PC reg
+                if (BIT(inst_cream->inst, 15))
+                    cpu->WriteMemory32(addr, cpu->Reg_usr[1] + 8);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SXTB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            sxtb_inst* inst_cream = (sxtb_inst*)inst_base->component;
+
+            unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate);
+            if (BIT(operand2, 7)) {
+                operand2 |= 0xffffff00;
+            } else {
+                operand2 &= 0xff;
+            }
+            RD = operand2;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(sxtb_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STR_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            unsigned int reg = BITS(inst_cream->inst, 12, 15);
+            unsigned int value = cpu->Reg[reg];
+
+            if (reg == 15)
+                value += 2 * cpu->GetInstructionSize();
+
+            cpu->WriteMemory32(addr, value);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    UXTB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            uxtb_inst* inst_cream = (uxtb_inst*)inst_base->component;
+            RD = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xff;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(uxtb_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    UXTAB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            uxtab_inst* inst_cream = (uxtab_inst*)inst_base->component;
+
+            unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xff;
+            RD = RN + operand2;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(uxtab_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STRB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+            unsigned int value = cpu->Reg[BITS(inst_cream->inst, 12, 15)] & 0xff;
+            cpu->WriteMemory8(addr, value);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STRBT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            const u32 previous_mode = cpu->Mode;
+            const u32 value = cpu->Reg[BITS(inst_cream->inst, 12, 15)] & 0xff;
+
+            cpu->ChangePrivilegeMode(USER32MODE);
+            cpu->WriteMemory8(addr, value);
+            cpu->ChangePrivilegeMode(previous_mode);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STRD_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            // The 3DS doesn't have the Large Physical Access Extension (LPAE)
+            // so STRD wouldn't store these as a single write.
+            cpu->WriteMemory32(addr + 0, cpu->Reg[BITS(inst_cream->inst, 12, 15)]);
+            cpu->WriteMemory32(addr + 4, cpu->Reg[BITS(inst_cream->inst, 12, 15) + 1]);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STREX_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
+
+            if (cpu->IsExclusiveMemoryAccess(write_addr)) {
+                cpu->UnsetExclusiveMemoryAddress();
+                cpu->WriteMemory32(write_addr, RM);
+                RD = 0;
+            } else {
+                // Failed to write due to mutex access
+                RD = 1;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STREXB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
+
+            if (cpu->IsExclusiveMemoryAccess(write_addr)) {
+                cpu->UnsetExclusiveMemoryAddress();
+                cpu->WriteMemory8(write_addr, cpu->Reg[inst_cream->Rm]);
+                RD = 0;
+            } else {
+                // Failed to write due to mutex access
+                RD = 1;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STREXD_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
+
+            if (cpu->IsExclusiveMemoryAccess(write_addr)) {
+                cpu->UnsetExclusiveMemoryAddress();
+
+                const u32 rt  = cpu->Reg[inst_cream->Rm + 0];
+                const u32 rt2 = cpu->Reg[inst_cream->Rm + 1];
+                u64 value;
+
+                if (cpu->InBigEndianMode())
+                    value = (((u64)rt << 32) | rt2);
+                else
+                    value = (((u64)rt2 << 32) | rt);
+
+                cpu->WriteMemory64(write_addr, value);
+                RD = 0;
+            }
+            else {
+                // Failed to write due to mutex access
+                RD = 1;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STREXH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
+
+            if (cpu->IsExclusiveMemoryAccess(write_addr)) {
+                cpu->UnsetExclusiveMemoryAddress();
+                cpu->WriteMemory16(write_addr, RM);
+                RD = 0;
+            } else {
+                // Failed to write due to mutex access
+                RD = 1;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STRH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            unsigned int value = cpu->Reg[BITS(inst_cream->inst, 12, 15)] & 0xffff;
+            cpu->WriteMemory16(addr, value);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STRT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
+            inst_cream->get_addr(cpu, inst_cream->inst, addr);
+
+            const u32 previous_mode = cpu->Mode;
+            const u32 rt_index = BITS(inst_cream->inst, 12, 15);
+
+            u32 value = cpu->Reg[rt_index];
+            if (rt_index == 15)
+                value += 2 * cpu->GetInstructionSize();
+
+            cpu->ChangePrivilegeMode(USER32MODE);
+            cpu->WriteMemory32(addr, value);
+            cpu->ChangePrivilegeMode(previous_mode);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ldst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SUB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            sub_inst* const inst_cream = (sub_inst*)inst_base->component;
+
+            u32 rn_val = RN;
+            if (inst_cream->Rn == 15)
+                rn_val += 2 * cpu->GetInstructionSize();
+
+            bool carry;
+            bool overflow;
+            RD = AddWithCarry(rn_val, ~SHIFTER_OPERAND, 1, &carry, &overflow);
+
+            if (inst_cream->S && (inst_cream->Rd == 15)) {
+                if (CurrentModeHasSPSR) {
+                    cpu->Cpsr = cpu->Spsr_copy;
+                    cpu->ChangePrivilegeMode(cpu->Spsr_copy & 0x1F);
+                    LOAD_NZCVT;
+                }
+            } else if (inst_cream->S) {
+                UPDATE_NFLAG(RD);
+                UPDATE_ZFLAG(RD);
+                cpu->CFlag = carry;
+                cpu->VFlag = overflow;
+            }
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(sub_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(sub_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SWI_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            swi_inst* const inst_cream = (swi_inst*)inst_base->component;
+            (*cpu->user_callbacks.CallSVC)(inst_cream->num & 0xFFFF);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(swi_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SWP_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            swp_inst* inst_cream = (swp_inst*)inst_base->component;
+
+            addr = RN;
+            unsigned int value = cpu->ReadMemory32(addr);
+            cpu->WriteMemory32(addr, RM);
+
+            RD = value;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(swp_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SWPB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            swp_inst* inst_cream = (swp_inst*)inst_base->component;
+            addr = RN;
+            unsigned int value = cpu->ReadMemory8(addr);
+            cpu->WriteMemory8(addr, (RM & 0xFF));
+            RD = value;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(swp_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    SXTAB_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            sxtab_inst* inst_cream = (sxtab_inst*)inst_base->component;
+
+            unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xff;
+
+            // Sign extend for byte
+            operand2 = (0x80 & operand2)? (0xFFFFFF00 | operand2):operand2;
+            RD = RN + operand2;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(uxtab_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SXTAB16_INST:
+    SXTB16_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            sxtab_inst* const inst_cream = (sxtab_inst*)inst_base->component;
+
+            const u8 rotation = inst_cream->rotate * 8;
+            u32 rm_val = RM;
+            u32 rn_val = RN;
+
+            if (rotation)
+                rm_val = ((rm_val << (32 - rotation)) | (rm_val >> rotation));
+
+            // SXTB16
+            if (inst_cream->Rn == 15) {
+                u32 lo = (u32)(s8)rm_val;
+                u32 hi = (u32)(s8)(rm_val >> 16);
+                RD = (lo | (hi << 16));
+            }
+            // SXTAB16
+            else {
+                u32 lo = (rn_val & 0xFFFF) + (u32)(s8)(rm_val & 0xFF);
+                u32 hi = ((rn_val >> 16) & 0xFFFF) + (u32)(s8)((rm_val >> 16) & 0xFF);
+                RD = (lo | (hi << 16));
+            }
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(sxtab_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SXTAH_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            sxtah_inst* inst_cream = (sxtah_inst*)inst_base->component;
+
+            unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xffff;
+            // Sign extend for half
+            operand2 = (0x8000 & operand2) ? (0xFFFF0000 | operand2) : operand2;
+            RD = RN + operand2;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(sxtah_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    TEQ_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            teq_inst* const inst_cream = (teq_inst*)inst_base->component;
+
+            u32 lop = RN;
+            u32 rop = SHIFTER_OPERAND;
+
+            if (inst_cream->Rn == 15)
+                lop += cpu->GetInstructionSize() * 2;
+
+            u32 result = lop ^ rop;
+
+            UPDATE_NFLAG(result);
+            UPDATE_ZFLAG(result);
+            UPDATE_CFLAG_WITH_SC;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(teq_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    TST_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            tst_inst* const inst_cream = (tst_inst*)inst_base->component;
+
+            u32 lop = RN;
+            u32 rop = SHIFTER_OPERAND;
+
+            if (inst_cream->Rn == 15)
+                lop += cpu->GetInstructionSize() * 2;
+
+            u32 result = lop & rop;
+
+            UPDATE_NFLAG(result);
+            UPDATE_ZFLAG(result);
+            UPDATE_CFLAG_WITH_SC;
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(tst_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    UADD8_INST:
+    UADD16_INST:
+    UADDSUBX_INST:
+    USUB8_INST:
+    USUB16_INST:
+    USUBADDX_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+            const u8 op2 = inst_cream->op2;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+
+            s32 lo_result = 0;
+            s32 hi_result = 0;
+
+            // UADD16
+            if (op2 == 0x00) {
+                lo_result = (rn_val & 0xFFFF) + (rm_val & 0xFFFF);
+                hi_result = ((rn_val >> 16) & 0xFFFF) + ((rm_val >> 16) & 0xFFFF);
+
+                if (lo_result & 0xFFFF0000) {
+                    cpu->Cpsr |= (1 << 16);
+                    cpu->Cpsr |= (1 << 17);
+                } else {
+                    cpu->Cpsr &= ~(1 << 16);
+                    cpu->Cpsr &= ~(1 << 17);
+                }
+
+                if (hi_result & 0xFFFF0000) {
+                    cpu->Cpsr |= (1 << 18);
+                    cpu->Cpsr |= (1 << 19);
+                } else {
+                    cpu->Cpsr &= ~(1 << 18);
+                    cpu->Cpsr &= ~(1 << 19);
+                }
+            }
+            // UASX
+            else if (op2 == 0x01) {
+                lo_result = (rn_val & 0xFFFF) - ((rm_val >> 16) & 0xFFFF);
+                hi_result = ((rn_val >> 16) & 0xFFFF) + (rm_val & 0xFFFF);
+
+                if (lo_result >= 0) {
+                    cpu->Cpsr |= (1 << 16);
+                    cpu->Cpsr |= (1 << 17);
+                } else {
+                    cpu->Cpsr &= ~(1 << 16);
+                    cpu->Cpsr &= ~(1 << 17);
+                }
+
+                if (hi_result >= 0x10000) {
+                    cpu->Cpsr |= (1 << 18);
+                    cpu->Cpsr |= (1 << 19);
+                } else {
+                    cpu->Cpsr &= ~(1 << 18);
+                    cpu->Cpsr &= ~(1 << 19);
+                }
+            }
+            // USAX
+            else if (op2 == 0x02) {
+                lo_result = (rn_val & 0xFFFF) + ((rm_val >> 16) & 0xFFFF);
+                hi_result = ((rn_val >> 16) & 0xFFFF) - (rm_val & 0xFFFF);
+
+                if (lo_result >= 0x10000) {
+                    cpu->Cpsr |= (1 << 16);
+                    cpu->Cpsr |= (1 << 17);
+                } else {
+                    cpu->Cpsr &= ~(1 << 16);
+                    cpu->Cpsr &= ~(1 << 17);
+                }
+
+                if (hi_result >= 0) {
+                    cpu->Cpsr |= (1 << 18);
+                    cpu->Cpsr |= (1 << 19);
+                } else {
+                    cpu->Cpsr &= ~(1 << 18);
+                    cpu->Cpsr &= ~(1 << 19);
+                }
+            }
+            // USUB16
+            else if (op2 == 0x03) {
+                lo_result = (rn_val & 0xFFFF) - (rm_val & 0xFFFF);
+                hi_result = ((rn_val >> 16) & 0xFFFF) - ((rm_val >> 16) & 0xFFFF);
+
+                if ((lo_result & 0xFFFF0000) == 0) {
+                    cpu->Cpsr |= (1 << 16);
+                    cpu->Cpsr |= (1 << 17);
+                } else {
+                    cpu->Cpsr &= ~(1 << 16);
+                    cpu->Cpsr &= ~(1 << 17);
+                }
+
+                if ((hi_result & 0xFFFF0000) == 0) {
+                    cpu->Cpsr |= (1 << 18);
+                    cpu->Cpsr |= (1 << 19);
+                } else {
+                    cpu->Cpsr &= ~(1 << 18);
+                    cpu->Cpsr &= ~(1 << 19);
+                }
+            }
+            // UADD8
+            else if (op2 == 0x04) {
+                s16 sum1 = (rn_val & 0xFF) + (rm_val & 0xFF);
+                s16 sum2 = ((rn_val >> 8) & 0xFF) + ((rm_val >> 8) & 0xFF);
+                s16 sum3 = ((rn_val >> 16) & 0xFF) + ((rm_val >> 16) & 0xFF);
+                s16 sum4 = ((rn_val >> 24) & 0xFF) + ((rm_val >> 24) & 0xFF);
+
+                if (sum1 >= 0x100)
+                    cpu->Cpsr |= (1 << 16);
+                else
+                    cpu->Cpsr &= ~(1 << 16);
+
+                if (sum2 >= 0x100)
+                    cpu->Cpsr |= (1 << 17);
+                else
+                    cpu->Cpsr &= ~(1 << 17);
+
+                if (sum3 >= 0x100)
+                    cpu->Cpsr |= (1 << 18);
+                else
+                    cpu->Cpsr &= ~(1 << 18);
+
+                if (sum4 >= 0x100)
+                    cpu->Cpsr |= (1 << 19);
+                else
+                    cpu->Cpsr &= ~(1 << 19);
+
+                lo_result = ((sum1 & 0xFF) | (sum2 & 0xFF) << 8);
+                hi_result = ((sum3 & 0xFF) | (sum4 & 0xFF) << 8);
+            }
+            // USUB8
+            else if (op2 == 0x07) {
+                s16 diff1 = (rn_val & 0xFF) - (rm_val & 0xFF);
+                s16 diff2 = ((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF);
+                s16 diff3 = ((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF);
+                s16 diff4 = ((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF);
+
+                if (diff1 >= 0)
+                    cpu->Cpsr |= (1 << 16);
+                else
+                    cpu->Cpsr &= ~(1 << 16);
+
+                if (diff2 >= 0)
+                    cpu->Cpsr |= (1 << 17);
+                else
+                    cpu->Cpsr &= ~(1 << 17);
+
+                if (diff3 >= 0)
+                    cpu->Cpsr |= (1 << 18);
+                else
+                    cpu->Cpsr &= ~(1 << 18);
+
+                if (diff4 >= 0)
+                    cpu->Cpsr |= (1 << 19);
+                else
+                    cpu->Cpsr &= ~(1 << 19);
+
+                lo_result = (diff1 & 0xFF) | ((diff2 & 0xFF) << 8);
+                hi_result = (diff3 & 0xFF) | ((diff4 & 0xFF) << 8);
+            }
+
+            RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    UHADD8_INST:
+    UHADD16_INST:
+    UHADDSUBX_INST:
+    UHSUBADDX_INST:
+    UHSUB8_INST:
+    UHSUB16_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+            const u8 op2 = inst_cream->op2;
+
+            if (op2 == 0x00 || op2 == 0x01 || op2 == 0x02 || op2 == 0x03)
+            {
+                u32 lo_val = 0;
+                u32 hi_val = 0;
+
+                // UHADD16
+                if (op2 == 0x00) {
+                    lo_val = (rn_val & 0xFFFF) + (rm_val & 0xFFFF);
+                    hi_val = ((rn_val >> 16) & 0xFFFF) + ((rm_val >> 16) & 0xFFFF);
+                }
+                // UHASX
+                else if (op2 == 0x01) {
+                    lo_val = (rn_val & 0xFFFF) - ((rm_val >> 16) & 0xFFFF);
+                    hi_val = ((rn_val >> 16) & 0xFFFF) + (rm_val & 0xFFFF);
+                }
+                // UHSAX
+                else if (op2 == 0x02) {
+                    lo_val = (rn_val & 0xFFFF) + ((rm_val >> 16) & 0xFFFF);
+                    hi_val = ((rn_val >> 16) & 0xFFFF) - (rm_val & 0xFFFF);
+                }
+                // UHSUB16
+                else if (op2 == 0x03) {
+                    lo_val = (rn_val & 0xFFFF) - (rm_val & 0xFFFF);
+                    hi_val = ((rn_val >> 16) & 0xFFFF) - ((rm_val >> 16) & 0xFFFF);
+                }
+
+                lo_val >>= 1;
+                hi_val >>= 1;
+
+                RD = (lo_val & 0xFFFF) | ((hi_val & 0xFFFF) << 16);
+            }
+            else if (op2 == 0x04 || op2 == 0x07) {
+                u32 sum1;
+                u32 sum2;
+                u32 sum3;
+                u32 sum4;
+
+                // UHADD8
+                if (op2 == 0x04) {
+                    sum1 = (rn_val & 0xFF) + (rm_val & 0xFF);
+                    sum2 = ((rn_val >> 8) & 0xFF) + ((rm_val >> 8) & 0xFF);
+                    sum3 = ((rn_val >> 16) & 0xFF) + ((rm_val >> 16) & 0xFF);
+                    sum4 = ((rn_val >> 24) & 0xFF) + ((rm_val >> 24) & 0xFF);
+                }
+                // UHSUB8
+                else {
+                    sum1 = (rn_val & 0xFF) - (rm_val & 0xFF);
+                    sum2 = ((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF);
+                    sum3 = ((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF);
+                    sum4 = ((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF);
+                }
+
+                sum1 >>= 1;
+                sum2 >>= 1;
+                sum3 >>= 1;
+                sum4 >>= 1;
+
+                RD = (sum1 & 0xFF) | ((sum2 & 0xFF) << 8) | ((sum3 & 0xFF) << 16) | ((sum4 & 0xFF) << 24);
+            }
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    UMAAL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            umaal_inst* const inst_cream = (umaal_inst*)inst_base->component;
+            const u64 rm = RM;
+            const u64 rn = RN;
+            const u64 rd_lo = RDLO;
+            const u64 rd_hi = RDHI;
+            const u64 result = (rm * rn) + rd_lo + rd_hi;
+
+            RDLO = (result & 0xFFFFFFFF);
+            RDHI = ((result >> 32) & 0xFFFFFFFF);
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(umaal_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    UMLAL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            umlal_inst* inst_cream = (umlal_inst*)inst_base->component;
+            unsigned long long int rm = RM;
+            unsigned long long int rs = RS;
+            unsigned long long int rst = rm * rs;
+            unsigned long long int add = ((unsigned long long) RDHI)<<32;
+            add += RDLO;
+            rst += add;
+            RDLO = BITS(rst,  0, 31);
+            RDHI = BITS(rst, 32, 63);
+
+            if (inst_cream->S) {
+                cpu->NFlag = BIT(RDHI, 31);
+                cpu->ZFlag = (RDHI == 0 && RDLO == 0);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(umlal_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    UMULL_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            umull_inst* inst_cream = (umull_inst*)inst_base->component;
+            unsigned long long int rm = RM;
+            unsigned long long int rs = RS;
+            unsigned long long int rst = rm * rs;
+            RDHI = BITS(rst, 32, 63);
+            RDLO = BITS(rst,  0, 31);
+
+            if (inst_cream->S) {
+                cpu->NFlag = BIT(RDHI, 31);
+                cpu->ZFlag = (RDHI == 0 && RDLO == 0);
+            }
+        }
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(umull_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    B_2_THUMB:
+    {
+        b_2_thumb* inst_cream = (b_2_thumb*)inst_base->component;
+        cpu->Reg[15] = cpu->Reg[15] + 4 + inst_cream->imm;
+        INC_PC(sizeof(b_2_thumb));
+        goto DISPATCH;
+    }
+    B_COND_THUMB:
+    {
+        b_cond_thumb* inst_cream = (b_cond_thumb*)inst_base->component;
+
+        if(CondPassed(cpu, inst_cream->cond))
+            cpu->Reg[15] = cpu->Reg[15] + 4 + inst_cream->imm;
+        else
+            cpu->Reg[15] += 2;
+
+        INC_PC(sizeof(b_cond_thumb));
+        goto DISPATCH;
+    }
+    BL_1_THUMB:
+    {
+        bl_1_thumb* inst_cream = (bl_1_thumb*)inst_base->component;
+        cpu->Reg[14] = cpu->Reg[15] + 4 + inst_cream->imm;
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(bl_1_thumb));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    BL_2_THUMB:
+    {
+        bl_2_thumb* inst_cream = (bl_2_thumb*)inst_base->component;
+        int tmp = ((cpu->Reg[15] + 2) | 1);
+        cpu->Reg[15] = (cpu->Reg[14] + inst_cream->imm);
+        cpu->Reg[14] = tmp;
+        INC_PC(sizeof(bl_2_thumb));
+        goto DISPATCH;
+    }
+    BLX_1_THUMB:
+    {
+        // BLX 1 for armv5t and above
+        u32 tmp = cpu->Reg[15];
+        blx_1_thumb* inst_cream = (blx_1_thumb*)inst_base->component;
+        cpu->Reg[15] = (cpu->Reg[14] + inst_cream->imm) & 0xFFFFFFFC;
+        cpu->Reg[14] = ((tmp + 2) | 1);
+        cpu->TFlag = 0;
+        INC_PC(sizeof(blx_1_thumb));
+        goto DISPATCH;
+    }
+
+    UQADD8_INST:
+    UQADD16_INST:
+    UQADDSUBX_INST:
+    UQSUB8_INST:
+    UQSUB16_INST:
+    UQSUBADDX_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+            const u8 op2 = inst_cream->op2;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+
+            u16 lo_val = 0;
+            u16 hi_val = 0;
+
+            // UQADD16
+            if (op2 == 0x00) {
+                lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, rm_val & 0xFFFF);
+                hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+            }
+            // UQASX
+            else if (op2 == 0x01) {
+                lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+                hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF);
+            }
+            // UQSAX
+            else if (op2 == 0x02) {
+                lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+                hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF);
+            }
+            // UQSUB16
+            else if (op2 == 0x03) {
+                lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, rm_val & 0xFFFF);
+                hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF);
+            }
+            // UQADD8
+            else if (op2 == 0x04) {
+                lo_val = ARMul_UnsignedSaturatedAdd8(rn_val, rm_val) |
+                         ARMul_UnsignedSaturatedAdd8(rn_val >> 8,  rm_val >> 8) << 8;
+                hi_val = ARMul_UnsignedSaturatedAdd8(rn_val >> 16, rm_val >> 16) |
+                         ARMul_UnsignedSaturatedAdd8(rn_val >> 24, rm_val >> 24) << 8;
+            }
+            // UQSUB8
+            else {
+                lo_val = ARMul_UnsignedSaturatedSub8(rn_val, rm_val) |
+                         ARMul_UnsignedSaturatedSub8(rn_val >> 8,  rm_val >> 8) << 8;
+                hi_val = ARMul_UnsignedSaturatedSub8(rn_val >> 16, rm_val >> 16) |
+                         ARMul_UnsignedSaturatedSub8(rn_val >> 24, rm_val >> 24) << 8;
+            }
+
+            RD = ((lo_val & 0xFFFF) | hi_val << 16);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    USAD8_INST:
+    USADA8_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+
+            const u8 ra_idx = inst_cream->Ra;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+
+            const u8 diff1 = ARMul_UnsignedAbsoluteDifference(rn_val & 0xFF, rm_val & 0xFF);
+            const u8 diff2 = ARMul_UnsignedAbsoluteDifference((rn_val >> 8) & 0xFF, (rm_val >> 8) & 0xFF);
+            const u8 diff3 = ARMul_UnsignedAbsoluteDifference((rn_val >> 16) & 0xFF, (rm_val >> 16) & 0xFF);
+            const u8 diff4 = ARMul_UnsignedAbsoluteDifference((rn_val >> 24) & 0xFF, (rm_val >> 24) & 0xFF);
+
+            u32 finalDif = (diff1 + diff2 + diff3 + diff4);
+
+            // Op is USADA8 if true.
+            if (ra_idx != 15)
+                finalDif += cpu->Reg[ra_idx];
+
+            RD = finalDif;
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    USAT_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ssat_inst* const inst_cream = (ssat_inst*)inst_base->component;
+
+            u8 shift_type = inst_cream->shift_type;
+            u8 shift_amount = inst_cream->imm5;
+            u32 rn_val = RN;
+
+            // 32-bit ASR is encoded as an amount of 0.
+            if (shift_type == 1 && shift_amount == 0)
+                shift_amount = 31;
+
+            if (shift_type == 0)
+                rn_val <<= shift_amount;
+            else if (shift_type == 1)
+                rn_val = ((s32)rn_val >> shift_amount);
+
+            bool saturated = false;
+            rn_val = ARMul_UnsignedSatQ(rn_val, inst_cream->sat_imm, &saturated);
+
+            if (saturated)
+                cpu->Cpsr |= (1 << 27);
+
+            RD = rn_val;
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ssat_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    USAT16_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            ssat_inst* const inst_cream = (ssat_inst*)inst_base->component;
+            const u8 saturate_to = inst_cream->sat_imm;
+
+            bool sat1 = false;
+            bool sat2 = false;
+
+            RD = (ARMul_UnsignedSatQ((s16)RN, saturate_to, &sat1) & 0xFFFF) |
+                 ARMul_UnsignedSatQ((s32)RN >> 16, saturate_to, &sat2) << 16;
+
+            if (sat1 || sat2)
+                cpu->Cpsr |= (1 << 27);
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(ssat_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    UXTAB16_INST:
+    UXTB16_INST:
+    {
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            uxtab_inst* const inst_cream = (uxtab_inst*)inst_base->component;
+
+            const u8 rn_idx = inst_cream->Rn;
+            const u32 rm_val = RM;
+            const u32 rotation = inst_cream->rotate * 8;
+            const u32 rotated_rm = ((rm_val << (32 - rotation)) | (rm_val >> rotation));
+
+            // UXTB16, otherwise UXTAB16
+            if (rn_idx == 15) {
+                RD = rotated_rm & 0x00FF00FF;
+            } else {
+                const u32 rn_val = RN;
+                const u8 lo_rotated = (rotated_rm & 0xFF);
+                const u16 lo_result = (rn_val & 0xFFFF) + (u16)lo_rotated;
+                const u8 hi_rotated = (rotated_rm >> 16) & 0xFF;
+                const u16 hi_result = (rn_val >> 16) + (u16)hi_rotated;
+
+                RD = ((hi_result << 16) | (lo_result & 0xFFFF));
+            }
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC(sizeof(uxtab_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    WFE_INST:
+    {
+        // Stubbed, as WFE is a hint instruction.
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            LOG_TRACE(Core_ARM11, "WFE executed.");
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC_STUB;
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    WFI_INST:
+    {
+        // Stubbed, as WFI is a hint instruction.
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            LOG_TRACE(Core_ARM11, "WFI executed.");
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC_STUB;
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    YIELD_INST:
+    {
+        // Stubbed, as YIELD is a hint instruction.
+        if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+            LOG_TRACE(Core_ARM11, "YIELD executed.");
+        }
+
+        cpu->Reg[15] += cpu->GetInstructionSize();
+        INC_PC_STUB;
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    #define VFP_INTERPRETER_IMPL
+    #include "tests/skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp"
+    #undef VFP_INTERPRETER_IMPL
+
+    END:
+    {
+        SAVE_NZCVT;
+        cpu->NumInstrsToExecute = 0;
+        return num_instrs;
+    }
+    INIT_INST_LENGTH:
+    {
+        cpu->NumInstrsToExecute = 0;
+        return num_instrs;
+    }
+}
diff --git a/tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.h b/tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.h
new file mode 100644
index 00000000..5d5b2d52
--- /dev/null
+++ b/tests/skyeye_interpreter/dyncom/arm_dyncom_interpreter.h
@@ -0,0 +1,10 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+struct ARMul_State;
+
+unsigned InterpreterMainLoop(ARMul_State* state);
+void InterpreterClearCache();
diff --git a/tests/skyeye_interpreter/dyncom/arm_dyncom_run.h b/tests/skyeye_interpreter/dyncom/arm_dyncom_run.h
new file mode 100644
index 00000000..5e0eb5f2
--- /dev/null
+++ b/tests/skyeye_interpreter/dyncom/arm_dyncom_run.h
@@ -0,0 +1,48 @@
+/* Copyright (C)
+* 2011 - Michael.Kang blackfin.kang@gmail.com
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*
+*/
+
+#pragma once
+
+#include "tests/skyeye_interpreter/skyeye_common/armstate.h"
+
+/**
+ * Checks if the PC is being read, and if so, word-aligns it.
+ * Used with address calculations.
+ *
+ * @param cpu The ARM CPU state instance.
+ * @param Rn   The register being read.
+ *
+ * @return If the PC is being read, then the word-aligned PC value is returned.
+ *         If the PC is not being read, then the value stored in the register is returned.
+ */
+inline u32 CHECK_READ_REG15_WA(const ARMul_State* cpu, int Rn) {
+    return (Rn == 15) ? ((cpu->Reg[15] & ~0x3) + cpu->GetInstructionSize() * 2) : cpu->Reg[Rn];
+}
+
+/**
+ * Reads the PC. Used for data processing operations that use the PC.
+ *
+ * @param cpu The ARM CPU state instance.
+ * @param Rn   The register being read.
+ *
+ * @return If the PC is being read, then the incremented PC value is returned.
+ *         If the PC is not being read, then the values stored in the register is returned.
+ */
+inline u32 CHECK_READ_REG15(const ARMul_State* cpu, int Rn) {
+    return (Rn == 15) ? ((cpu->Reg[15] & ~0x1) + cpu->GetInstructionSize() * 2) : cpu->Reg[Rn];
+}
diff --git a/tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.cpp b/tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.cpp
new file mode 100644
index 00000000..a2c1f9e9
--- /dev/null
+++ b/tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.cpp
@@ -0,0 +1,393 @@
+// Copyright 2012 Michael Kang, 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// We can provide simple Thumb simulation by decoding the Thumb instruction into its corresponding
+// ARM instruction, and using the existing ARM simulator.
+
+#include "tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.h"
+#include "tests/skyeye_interpreter/skyeye_common/armsupp.h"
+
+// Decode a 16bit Thumb instruction.  The instruction is in the low 16-bits of the tinstr field,
+// with the following Thumb instruction held in the high 16-bits.  Passing in two Thumb instructions
+// allows easier simulation of the special dual BL instruction.
+
+ThumbDecodeStatus TranslateThumbInstruction(u32 addr, u32 instr, u32* ainstr, u32* inst_size) {
+    ThumbDecodeStatus valid = ThumbDecodeStatus::UNINITIALIZED;
+    u32 tinstr = GetThumbInstruction(instr, addr);
+
+    *ainstr = 0xDEADC0DE; // Debugging to catch non updates
+
+    switch ((tinstr & 0xF800) >> 11) {
+    case 0: // LSL
+    case 1: // LSR
+    case 2: // ASR
+        *ainstr = 0xE1B00000                    // base opcode
+            | ((tinstr & 0x1800) >> (11 - 5))   // shift type
+            |((tinstr & 0x07C0) << (7 - 6))     // imm5
+            |((tinstr & 0x0038) >> 3)           // Rs
+            |((tinstr & 0x0007) << 12);         // Rd
+        break;
+
+    case 3: // ADD/SUB
+        {
+            static const u32 subset[4] = {
+                0xE0900000,     // ADDS Rd,Rs,Rn
+                0xE0500000,     // SUBS Rd,Rs,Rn
+                0xE2900000,     // ADDS Rd,Rs,#imm3
+                0xE2500000      // SUBS Rd,Rs,#imm3
+            };
+            // It is quicker indexing into a table, than performing switch or conditionals:
+            *ainstr = subset[(tinstr & 0x0600) >> 9]    // base opcode
+                |((tinstr & 0x01C0) >> 6)               // Rn or imm3
+                |((tinstr & 0x0038) << (16 - 3))        // Rs
+                |((tinstr & 0x0007) << (12 - 0));       // Rd
+        }
+        break;
+
+    case 4: // MOV
+    case 5: // CMP
+    case 6: // ADD
+    case 7: // SUB
+        {
+            static const u32 subset[4] = {
+                0xE3B00000,     // MOVS Rd,#imm8
+                0xE3500000,     // CMP  Rd,#imm8
+                0xE2900000,     // ADDS Rd,Rd,#imm8
+                0xE2500000,     // SUBS Rd,Rd,#imm8
+            };
+
+            *ainstr = subset[(tinstr & 0x1800) >> 11]   // base opcode
+                |((tinstr & 0x00FF) >> 0)               // imm8
+                |((tinstr & 0x0700) << (16 - 8))        // Rn
+                |((tinstr & 0x0700) << (12 - 8));       // Rd
+        }
+        break;
+
+    case 8: // Arithmetic and high register transfers
+
+        // TODO: Since the subsets for both Format 4 and Format 5 instructions are made up of
+        // different ARM encodings, we could save the following conditional, and just have one
+        // large subset
+
+        if ((tinstr & (1 << 10)) == 0) {
+            enum otype {
+                t_norm,
+                t_shift,
+                t_neg,
+                t_mul
+            };
+
+            static const struct {
+                u32 opcode;
+                otype type;
+            } subset[16] = {
+                { 0xE0100000, t_norm },     // ANDS Rd,Rd,Rs
+                { 0xE0300000, t_norm },     // EORS Rd,Rd,Rs
+                { 0xE1B00010, t_shift },    // MOVS Rd,Rd,LSL Rs
+                { 0xE1B00030, t_shift },    // MOVS Rd,Rd,LSR Rs
+                { 0xE1B00050, t_shift },    // MOVS Rd,Rd,ASR Rs
+                { 0xE0B00000, t_norm },     // ADCS Rd,Rd,Rs
+                { 0xE0D00000, t_norm },     // SBCS Rd,Rd,Rs
+                { 0xE1B00070, t_shift },    // MOVS Rd,Rd,ROR Rs
+                { 0xE1100000, t_norm },     // TST  Rd,Rs
+                { 0xE2700000, t_neg },      // RSBS Rd,Rs,#0
+                { 0xE1500000, t_norm },     // CMP  Rd,Rs
+                { 0xE1700000, t_norm },     // CMN  Rd,Rs
+                { 0xE1900000, t_norm },     // ORRS Rd,Rd,Rs
+                { 0xE0100090, t_mul },      // MULS Rd,Rd,Rs
+                { 0xE1D00000, t_norm },     // BICS Rd,Rd,Rs
+                { 0xE1F00000, t_norm }      // MVNS Rd,Rs
+            };
+
+            *ainstr = subset[(tinstr & 0x03C0) >> 6].opcode; // base
+
+            switch (subset[(tinstr & 0x03C0) >> 6].type) {
+            case t_norm:
+                *ainstr |= ((tinstr & 0x0007) << 16)    // Rn
+                    |((tinstr & 0x0007) << 12)          // Rd
+                    |((tinstr & 0x0038) >> 3);          // Rs
+                break;
+            case t_shift:
+                *ainstr |= ((tinstr & 0x0007) << 12)    // Rd
+                    |((tinstr & 0x0007) >> 0)           // Rm
+                    |((tinstr & 0x0038) << (8 - 3));    // Rs
+                break;
+            case t_neg:
+                *ainstr |= ((tinstr & 0x0007) << 12)    // Rd
+                    |((tinstr & 0x0038) << (16 - 3));   // Rn
+                break;
+            case t_mul:
+                *ainstr |= ((tinstr & 0x0007) << 16)    // Rd
+                    |((tinstr & 0x0007) << 8)           // Rs
+                    |((tinstr & 0x0038) >> 3);          // Rm
+                break;
+            }
+        } else {
+            u32 Rd = ((tinstr & 0x0007) >> 0);
+            u32 Rs = ((tinstr & 0x0078) >> 3);
+
+            if (tinstr & (1 << 7))
+                Rd += 8;
+
+            switch ((tinstr & 0x03C0) >> 6) {
+            case 0x0:                           // ADD Rd,Rd,Rs
+            case 0x1:                           // ADD Rd,Rd,Hs
+            case 0x2:                           // ADD Hd,Hd,Rs
+            case 0x3:                           // ADD Hd,Hd,Hs
+                *ainstr = 0xE0800000            // base
+                    | (Rd << 16)                // Rn
+                    |(Rd << 12)                 // Rd
+                    |(Rs << 0);                 // Rm
+                break;
+            case 0x4:                           // CMP Rd,Rs
+            case 0x5:                           // CMP Rd,Hs
+            case 0x6:                           // CMP Hd,Rs
+            case 0x7:                           // CMP Hd,Hs
+                *ainstr = 0xE1500000            // base
+                    | (Rd << 16)                // Rn
+                    |(Rs << 0);                 // Rm
+                break;
+            case 0x8:                           // MOV Rd,Rs
+            case 0x9:                           // MOV Rd,Hs
+            case 0xA:                           // MOV Hd,Rs
+            case 0xB:                           // MOV Hd,Hs
+                *ainstr = 0xE1A00000            // base
+                    |(Rd << 12)                 // Rd
+                    |(Rs << 0);                 // Rm
+                break;
+            case 0xC:                           // BX Rs
+            case 0xD:                           // BX Hs
+                *ainstr = 0xE12FFF10            // base
+                    | ((tinstr & 0x0078) >> 3); // Rd
+                break;
+            case 0xE:                           // BLX
+            case 0xF:                           // BLX
+                *ainstr = 0xE1200030            // base
+                    | (Rs << 0);                // Rm
+                break;
+            }
+        }
+        break;
+
+    case 9: // LDR Rd,[PC,#imm8]
+        *ainstr = 0xE59F0000                    // base
+            | ((tinstr & 0x0700) << (12 - 8))   // Rd
+            |((tinstr & 0x00FF) << (2 - 0));    // off8
+        break;
+
+    case 10:
+    case 11:
+        {
+            static const u32 subset[8] = {
+                0xE7800000, // STR   Rd,[Rb,Ro]
+                0xE18000B0, // STRH  Rd,[Rb,Ro]
+                0xE7C00000, // STRB  Rd,[Rb,Ro]
+                0xE19000D0, // LDRSB Rd,[Rb,Ro]
+                0xE7900000, // LDR   Rd,[Rb,Ro]
+                0xE19000B0, // LDRH  Rd,[Rb,Ro]
+                0xE7D00000, // LDRB  Rd,[Rb,Ro]
+                0xE19000F0  // LDRSH Rd,[Rb,Ro]
+            };
+
+            *ainstr = subset[(tinstr & 0xE00) >> 9] // base
+                |((tinstr & 0x0007) << (12 - 0))    // Rd
+                |((tinstr & 0x0038) << (16 - 3))    // Rb
+                |((tinstr & 0x01C0) >> 6);          // Ro
+        }
+        break;
+
+    case 12: // STR Rd,[Rb,#imm5]
+    case 13: // LDR Rd,[Rb,#imm5]
+    case 14: // STRB Rd,[Rb,#imm5]
+    case 15: // LDRB Rd,[Rb,#imm5]
+        {
+            static const u32 subset[4] = {
+                0xE5800000,     // STR  Rd,[Rb,#imm5]
+                0xE5900000,     // LDR  Rd,[Rb,#imm5]
+                0xE5C00000,     // STRB Rd,[Rb,#imm5]
+                0xE5D00000      // LDRB Rd,[Rb,#imm5]
+            };
+            // The offset range defends on whether we are transferring a byte or word value:
+            *ainstr = subset[(tinstr & 0x1800) >> 11]   // base
+                |((tinstr & 0x0007) << (12 - 0))        // Rd
+                |((tinstr & 0x0038) << (16 - 3))        // Rb
+                |((tinstr & 0x07C0) >> (6 - ((tinstr & (1 << 12)) ? 0 : 2))); // off5
+        }
+        break;
+
+    case 16: // STRH Rd,[Rb,#imm5]
+    case 17: // LDRH Rd,[Rb,#imm5]
+        *ainstr = ((tinstr & (1 << 11))         // base
+               ? 0xE1D000B0                     // LDRH
+               : 0xE1C000B0)                    // STRH
+            |((tinstr & 0x0007) << (12 - 0))    // Rd
+            |((tinstr & 0x0038) << (16 - 3))    // Rb
+            |((tinstr & 0x01C0) >> (6 - 1))     // off5, low nibble
+            |((tinstr & 0x0600) >> (9 - 8));    // off5, high nibble
+        break;
+
+    case 18: // STR Rd,[SP,#imm8]
+    case 19: // LDR Rd,[SP,#imm8]
+        *ainstr = ((tinstr & (1 << 11))         // base
+               ? 0xE59D0000                     // LDR
+               : 0xE58D0000)                    // STR
+            |((tinstr & 0x0700) << (12 - 8))    // Rd
+            |((tinstr & 0x00FF) << 2);          // off8
+        break;
+
+    case 20: // ADD Rd,PC,#imm8
+    case 21: // ADD Rd,SP,#imm8
+
+        if ((tinstr & (1 << 11)) == 0) {
+
+            // NOTE: The PC value used here should by word aligned. We encode shift-left-by-2 in the
+            // rotate immediate field, so no shift of off8 is needed.
+
+            *ainstr = 0xE28F0F00                    // base
+                | ((tinstr & 0x0700) << (12 - 8))   // Rd
+                |(tinstr & 0x00FF);                 // off8
+        } else {
+            // We encode shift-left-by-2 in the rotate immediate field, so no shift of off8 is needed.
+            *ainstr = 0xE28D0F00                    // base
+                | ((tinstr & 0x0700) << (12 - 8))   // Rd
+                |(tinstr & 0x00FF);                 // off8
+        }
+        break;
+
+    case 22:
+    case 23:
+        if ((tinstr & 0x0F00) == 0x0000) {
+            // NOTE: The instruction contains a shift left of 2 equivalent (implemented as ROR #30):
+            *ainstr = ((tinstr & (1 << 7))  // base
+                   ? 0xE24DDF00             // SUB
+                   : 0xE28DDF00)            // ADD
+                |(tinstr & 0x007F);         // off7
+        } else if ((tinstr & 0x0F00) == 0x0e00) {
+            // BKPT
+            *ainstr = 0xEF000000              // base
+                | BITS(tinstr, 0, 3)          // imm4 field;
+                | (BITS(tinstr, 4, 7) << 8);  // beginning 4 bits of imm12
+        } else if ((tinstr & 0x0F00) == 0x0200) {
+            static const u32 subset[4] = {
+                0xE6BF0070, // SXTH
+                0xE6AF0070, // SXTB
+                0xE6FF0070, // UXTH
+                0xE6EF0070, // UXTB
+            };
+
+            *ainstr = subset[BITS(tinstr, 6, 7)] // base
+                | (BITS(tinstr, 0, 2) << 12)     // Rd
+                | BITS(tinstr, 3, 5);            // Rm
+        } else if ((tinstr & 0x0F00) == 0x600) {
+            if (BIT(tinstr, 5) == 0) {
+                // SETEND
+                *ainstr = 0xF1010000         // base
+                    | (BIT(tinstr, 3) << 9); // endian specifier
+            } else {
+                // CPS
+                *ainstr = 0xF1080000          // base
+                    | (BIT(tinstr, 0) << 6)   // fiq bit
+                    | (BIT(tinstr, 1) << 7)   // irq bit
+                    | (BIT(tinstr, 2) << 8)   // abort bit
+                    | (BIT(tinstr, 4) << 18); // enable bit
+            }
+        } else if ((tinstr & 0x0F00) == 0x0a00) {
+            static const u32 subset[3] = {
+                0xE6BF0F30, // REV
+                0xE6BF0FB0, // REV16
+                0xE6FF0FB0, // REVSH
+            };
+
+            *ainstr = subset[BITS(tinstr, 6, 7)] // base
+                | (BITS(tinstr, 0, 2) << 12)     // Rd
+                | BITS(tinstr, 3, 5);            // Rm
+        } else {
+            static const u32 subset[4] = {
+                0xE92D0000, // STMDB sp!,{rlist}
+                0xE92D4000, // STMDB sp!,{rlist,lr}
+                0xE8BD0000, // LDMIA sp!,{rlist}
+                0xE8BD8000  // LDMIA sp!,{rlist,pc}
+            };
+            *ainstr = subset[((tinstr & (1 << 11)) >> 10) | ((tinstr & (1 << 8)) >> 8)] // base
+                |(tinstr & 0x00FF); // mask8
+        }
+        break;
+
+    case 24: //  STMIA
+    case 25: //  LDMIA
+        if (tinstr & (1 << 11))
+        {
+            unsigned int base = 0xE8900000;
+            unsigned int rn = BITS(tinstr, 8, 10);
+
+            // Writeback
+            if ((tinstr & (1 << rn)) == 0)
+                base |= (1 << 21);
+
+            *ainstr = base           // base (LDMIA)
+                | (rn << 16)         // Rn
+                | (tinstr & 0x00FF); // Register list
+        }
+        else
+        {
+            *ainstr = 0xE8A00000              // base (STMIA)
+                | (BITS(tinstr, 8, 10) << 16) // Rn
+                | (tinstr & 0x00FF);          // Register list
+        }
+        break;
+
+    case 26: // Bcc
+    case 27: // Bcc/SWI
+        if ((tinstr & 0x0F00) == 0x0F00) {
+            // Format 17 : SWI
+            *ainstr = 0xEF000000;
+            // Breakpoint must be handled specially.
+            if ((tinstr & 0x00FF) == 0x18)
+                *ainstr |= ((tinstr & 0x00FF) << 16);
+            // New breakpoint value.  See gdb/arm-tdep.c
+            else if ((tinstr & 0x00FF) == 0xFE)
+                *ainstr |= 0x180000; // base |= BKPT mask
+            else
+                *ainstr |= (tinstr & 0x00FF);
+        } else if ((tinstr & 0x0F00) != 0x0E00)
+            valid = ThumbDecodeStatus::BRANCH;
+        else //  UNDEFINED : cc=1110(AL) uses different format
+            valid = ThumbDecodeStatus::UNDEFINED;
+
+        break;
+
+    case 28: // B
+        valid = ThumbDecodeStatus::BRANCH;
+        break;
+
+    case 29:
+        if (tinstr & 0x1)
+            valid = ThumbDecodeStatus::UNDEFINED;
+        else
+            valid = ThumbDecodeStatus::BRANCH;
+        break;
+
+    case 30: // BL instruction 1
+
+        // There is no single ARM instruction equivalent for this Thumb instruction. To keep the
+        // simulation simple (from the user perspective) we check if the following instruction is
+        // the second half of this BL, and if it is we simulate it immediately
+
+        valid = ThumbDecodeStatus::BRANCH;
+        break;
+
+    case 31: // BL instruction 2
+
+        // There is no single ARM instruction equivalent for this instruction. Also, it should only
+        // ever be matched with the fmt19 "BL instruction 1" instruction. However, we do allow the
+        // simulation of it on its own, with undefined results if r14 is not suitably initialised.
+
+        valid = ThumbDecodeStatus::BRANCH;
+        break;
+    }
+
+    *inst_size = 2;
+
+    return valid;
+}
diff --git a/tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.h b/tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.h
new file mode 100644
index 00000000..c1be3c73
--- /dev/null
+++ b/tests/skyeye_interpreter/dyncom/arm_dyncom_thumb.h
@@ -0,0 +1,49 @@
+/* Copyright (C)
+* 2011 - Michael.Kang blackfin.kang@gmail.com
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*
+*/
+
+/**
+* @file arm_dyncom_thumb.h
+* @brief The thumb dyncom
+* @author Michael.Kang blackfin.kang@gmail.com
+* @version 78.77
+* @date 2011-11-07
+*/
+
+#pragma once
+
+#include "common/common_types.h"
+
+enum class ThumbDecodeStatus {
+    UNDEFINED,    // Undefined Thumb instruction
+    DECODED,      // Instruction decoded to ARM equivalent
+    BRANCH,       // Thumb branch (already processed)
+    UNINITIALIZED,
+};
+
+// Translates a Thumb mode instruction into its ARM equivalent.
+ThumbDecodeStatus TranslateThumbInstruction(u32 addr, u32 instr, u32* ainstr, u32* inst_size);
+
+inline u32 GetThumbInstruction(u32 instr, u32 address) {
+    // Normally you would need to handle instruction endianness,
+    // however, it is fixed to little-endian on the MPCore, so
+    // there's no need to check for this beforehand.
+    if ((address & 0x3) != 0)
+        return instr >> 16;
+
+    return instr & 0xFFFF;
+}
diff --git a/tests/skyeye_interpreter/skyeye_common/arm_regformat.h b/tests/skyeye_interpreter/skyeye_common/arm_regformat.h
new file mode 100644
index 00000000..38fa97ab
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/arm_regformat.h
@@ -0,0 +1,187 @@
+#pragma once
+
+enum {
+    R0 = 0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R8,
+    R9,
+    R10,
+    R11,
+    R12,
+    R13,
+    LR,
+    R15, //PC,
+    CPSR_REG,
+    SPSR_REG,
+
+    PHYS_PC,
+    R13_USR,
+    R14_USR,
+    R13_SVC,
+    R14_SVC,
+    R13_ABORT,
+    R14_ABORT,
+    R13_UNDEF,
+    R14_UNDEF,
+    R13_IRQ,
+    R14_IRQ,
+    R8_FIRQ,
+    R9_FIRQ,
+    R10_FIRQ,
+    R11_FIRQ,
+    R12_FIRQ,
+    R13_FIRQ,
+    R14_FIRQ,
+    SPSR_INVALID1,
+    SPSR_INVALID2,
+    SPSR_SVC,
+    SPSR_ABORT,
+    SPSR_UNDEF,
+    SPSR_IRQ,
+    SPSR_FIRQ,
+    MODE_REG, /* That is the cpsr[4 : 0], just for calculation easily */
+    BANK_REG,
+    EXCLUSIVE_TAG,
+    EXCLUSIVE_STATE,
+    EXCLUSIVE_RESULT,
+
+    MAX_REG_NUM,
+};
+
+// VFP system registers
+enum VFPSystemRegister {
+    VFP_FPSID,
+    VFP_FPSCR,
+    VFP_FPEXC,
+    VFP_FPINST,
+    VFP_FPINST2,
+    VFP_MVFR0,
+    VFP_MVFR1,
+
+    // Not an actual register.
+    // All VFP system registers should be defined above this.
+    VFP_SYSTEM_REGISTER_COUNT
+};
+
+enum CP15Register {
+    // c0 - Information registers
+    CP15_MAIN_ID,
+    CP15_CACHE_TYPE,
+    CP15_TCM_STATUS,
+    CP15_TLB_TYPE,
+    CP15_CPU_ID,
+    CP15_PROCESSOR_FEATURE_0,
+    CP15_PROCESSOR_FEATURE_1,
+    CP15_DEBUG_FEATURE_0,
+    CP15_AUXILIARY_FEATURE_0,
+    CP15_MEMORY_MODEL_FEATURE_0,
+    CP15_MEMORY_MODEL_FEATURE_1,
+    CP15_MEMORY_MODEL_FEATURE_2,
+    CP15_MEMORY_MODEL_FEATURE_3,
+    CP15_ISA_FEATURE_0,
+    CP15_ISA_FEATURE_1,
+    CP15_ISA_FEATURE_2,
+    CP15_ISA_FEATURE_3,
+    CP15_ISA_FEATURE_4,
+
+    // c1 - Control registers
+    CP15_CONTROL,
+    CP15_AUXILIARY_CONTROL,
+    CP15_COPROCESSOR_ACCESS_CONTROL,
+
+    // c2 - Translation table registers
+    CP15_TRANSLATION_BASE_TABLE_0,
+    CP15_TRANSLATION_BASE_TABLE_1,
+    CP15_TRANSLATION_BASE_CONTROL,
+    CP15_DOMAIN_ACCESS_CONTROL,
+    CP15_RESERVED,
+
+    // c5 - Fault status registers
+    CP15_FAULT_STATUS,
+    CP15_INSTR_FAULT_STATUS,
+    CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
+    CP15_INST_FSR,
+
+    // c6 - Fault Address registers
+    CP15_FAULT_ADDRESS,
+    CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
+    CP15_WFAR,
+    CP15_IFAR,
+
+    // c7 - Cache operation registers
+    CP15_WAIT_FOR_INTERRUPT,
+    CP15_PHYS_ADDRESS,
+    CP15_INVALIDATE_INSTR_CACHE,
+    CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
+    CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
+    CP15_FLUSH_PREFETCH_BUFFER,
+    CP15_FLUSH_BRANCH_TARGET_CACHE,
+    CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
+    CP15_INVALIDATE_DATA_CACHE,
+    CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
+    CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
+    CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
+    CP15_CLEAN_DATA_CACHE,
+    CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
+    CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
+    CP15_DATA_SYNC_BARRIER,
+    CP15_DATA_MEMORY_BARRIER,
+    CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
+    CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
+    CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
+
+    // c8 - TLB operations
+    CP15_INVALIDATE_ITLB,
+    CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
+    CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
+    CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
+    CP15_INVALIDATE_DTLB,
+    CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
+    CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
+    CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
+    CP15_INVALIDATE_UTLB,
+    CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
+    CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
+    CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
+
+    // c9 - Data cache lockdown register
+    CP15_DATA_CACHE_LOCKDOWN,
+
+    // c10 - TLB/Memory map registers
+    CP15_TLB_LOCKDOWN,
+    CP15_PRIMARY_REGION_REMAP,
+    CP15_NORMAL_REGION_REMAP,
+
+    // c13 - Thread related registers
+    CP15_PID,
+    CP15_CONTEXT_ID,
+    CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
+    CP15_THREAD_URO,  // Thread ID register - User Read Only (Privileged R/W)
+    CP15_THREAD_PRW,  // Thread ID register - Privileged R/W only.
+
+    // c15 - Performance and TLB lockdown registers
+    CP15_PERFORMANCE_MONITOR_CONTROL,
+    CP15_CYCLE_COUNTER,
+    CP15_COUNT_0,
+    CP15_COUNT_1,
+    CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
+    CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
+    CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
+    CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
+    CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
+    CP15_TLB_DEBUG_CONTROL,
+
+    // Skyeye defined
+    CP15_TLB_FAULT_ADDR,
+    CP15_TLB_FAULT_STATUS,
+
+    // Not an actual register.
+    // All registers should be defined above this.
+    CP15_REGISTER_COUNT,
+};
diff --git a/tests/skyeye_interpreter/skyeye_common/armstate.cpp b/tests/skyeye_interpreter/skyeye_common/armstate.cpp
new file mode 100644
index 00000000..ad5f1fb6
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/armstate.cpp
@@ -0,0 +1,670 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include "common/logging/log.h"
+#include "tests/skyeye_interpreter/skyeye_common/armstate.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp.h"
+
+namespace Common {
+inline u16 swap16(u16 data) {return (data >> 8) | (data << 8);}
+inline u32 swap32(u32 data) {return (swap16(data) << 16) | swap16(data >> 16);}
+inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 32);}
+}
+
+ARMul_State::ARMul_State(PrivilegeMode initial_mode)
+{
+    Reset();
+    ChangePrivilegeMode(initial_mode);
+}
+
+void ARMul_State::ChangePrivilegeMode(u32 new_mode)
+{
+    if (Mode == new_mode)
+        return;
+
+    if (new_mode != USERBANK) {
+        switch (Mode) {
+        case SYSTEM32MODE: // Shares registers with user mode
+        case USER32MODE:
+            Reg_usr[0] = Reg[13];
+            Reg_usr[1] = Reg[14];
+            break;
+        case IRQ32MODE:
+            Reg_irq[0] = Reg[13];
+            Reg_irq[1] = Reg[14];
+            Spsr[IRQBANK] = Spsr_copy;
+            break;
+        case SVC32MODE:
+            Reg_svc[0] = Reg[13];
+            Reg_svc[1] = Reg[14];
+            Spsr[SVCBANK] = Spsr_copy;
+            break;
+        case ABORT32MODE:
+            Reg_abort[0] = Reg[13];
+            Reg_abort[1] = Reg[14];
+            Spsr[ABORTBANK] = Spsr_copy;
+            break;
+        case UNDEF32MODE:
+            Reg_undef[0] = Reg[13];
+            Reg_undef[1] = Reg[14];
+            Spsr[UNDEFBANK] = Spsr_copy;
+            break;
+        case FIQ32MODE:
+            std::copy(Reg.begin() + 8, Reg.end() - 1, Reg_firq.begin());
+            Spsr[FIQBANK] = Spsr_copy;
+            break;
+        }
+
+        switch (new_mode) {
+        case USER32MODE:
+            Reg[13] = Reg_usr[0];
+            Reg[14] = Reg_usr[1];
+            Bank = USERBANK;
+            break;
+        case IRQ32MODE:
+            Reg[13] = Reg_irq[0];
+            Reg[14] = Reg_irq[1];
+            Spsr_copy = Spsr[IRQBANK];
+            Bank = IRQBANK;
+            break;
+        case SVC32MODE:
+            Reg[13] = Reg_svc[0];
+            Reg[14] = Reg_svc[1];
+            Spsr_copy = Spsr[SVCBANK];
+            Bank = SVCBANK;
+            break;
+        case ABORT32MODE:
+            Reg[13] = Reg_abort[0];
+            Reg[14] = Reg_abort[1];
+            Spsr_copy = Spsr[ABORTBANK];
+            Bank = ABORTBANK;
+            break;
+        case UNDEF32MODE:
+            Reg[13] = Reg_undef[0];
+            Reg[14] = Reg_undef[1];
+            Spsr_copy = Spsr[UNDEFBANK];
+            Bank = UNDEFBANK;
+            break;
+        case FIQ32MODE:
+            std::copy(Reg_firq.begin(), Reg_firq.end(), Reg.begin() + 8);
+            Spsr_copy = Spsr[FIQBANK];
+            Bank = FIQBANK;
+            break;
+        case SYSTEM32MODE: // Shares registers with user mode.
+            Reg[13] = Reg_usr[0];
+            Reg[14] = Reg_usr[1];
+            Bank = SYSTEMBANK;
+            break;
+        }
+
+        // Set the mode bits in the APSR
+        Cpsr = (Cpsr & ~Mode) | new_mode;
+        Mode = new_mode;
+    }
+}
+
+// Performs a reset
+void ARMul_State::Reset()
+{
+    VFPInit(this);
+
+    // Set stack pointer to the top of the stack
+    Reg[13] = 0x10000000;
+    Reg[15] = 0;
+
+    Cpsr = INTBITS | SVC32MODE;
+    Mode = SVC32MODE;
+    Bank = SVCBANK;
+
+    ResetMPCoreCP15Registers();
+
+    NresetSig = HIGH;
+    NfiqSig = HIGH;
+    NirqSig = HIGH;
+    NtransSig = (Mode & 3) ? HIGH : LOW;
+    abortSig = LOW;
+
+    NumInstrs = 0;
+    Emulate = RUN;
+}
+
+// Resets certain MPCore CP15 values to their ARM-defined reset values.
+void ARMul_State::ResetMPCoreCP15Registers()
+{
+    // c0
+    CP15[CP15_MAIN_ID] = 0x410FB024;
+    CP15[CP15_TLB_TYPE] = 0x00000800;
+    CP15[CP15_PROCESSOR_FEATURE_0] = 0x00000111;
+    CP15[CP15_PROCESSOR_FEATURE_1] = 0x00000001;
+    CP15[CP15_DEBUG_FEATURE_0] = 0x00000002;
+    CP15[CP15_MEMORY_MODEL_FEATURE_0] = 0x01100103;
+    CP15[CP15_MEMORY_MODEL_FEATURE_1] = 0x10020302;
+    CP15[CP15_MEMORY_MODEL_FEATURE_2] = 0x01222000;
+    CP15[CP15_MEMORY_MODEL_FEATURE_3] = 0x00000000;
+    CP15[CP15_ISA_FEATURE_0] = 0x00100011;
+    CP15[CP15_ISA_FEATURE_1] = 0x12002111;
+    CP15[CP15_ISA_FEATURE_2] = 0x11221011;
+    CP15[CP15_ISA_FEATURE_3] = 0x01102131;
+    CP15[CP15_ISA_FEATURE_4] = 0x00000141;
+
+    // c1
+    CP15[CP15_CONTROL] = 0x00054078;
+    CP15[CP15_AUXILIARY_CONTROL] = 0x0000000F;
+    CP15[CP15_COPROCESSOR_ACCESS_CONTROL] = 0x00000000;
+
+    // c2
+    CP15[CP15_TRANSLATION_BASE_TABLE_0] = 0x00000000;
+    CP15[CP15_TRANSLATION_BASE_TABLE_1] = 0x00000000;
+    CP15[CP15_TRANSLATION_BASE_CONTROL] = 0x00000000;
+
+    // c3
+    CP15[CP15_DOMAIN_ACCESS_CONTROL] = 0x00000000;
+
+    // c7
+    CP15[CP15_PHYS_ADDRESS] = 0x00000000;
+
+    // c9
+    CP15[CP15_DATA_CACHE_LOCKDOWN] = 0xFFFFFFF0;
+
+    // c10
+    CP15[CP15_TLB_LOCKDOWN] = 0x00000000;
+    CP15[CP15_PRIMARY_REGION_REMAP] = 0x00098AA4;
+    CP15[CP15_NORMAL_REGION_REMAP] = 0x44E048E0;
+
+    // c13
+    CP15[CP15_PID] = 0x00000000;
+    CP15[CP15_CONTEXT_ID] = 0x00000000;
+    CP15[CP15_THREAD_UPRW] = 0x00000000;
+    CP15[CP15_THREAD_URO] = 0x00000000;
+    CP15[CP15_THREAD_PRW] = 0x00000000;
+
+    // c15
+    CP15[CP15_PERFORMANCE_MONITOR_CONTROL] = 0x00000000;
+    CP15[CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS] = 0x00000000;
+    CP15[CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS] = 0x00000000;
+    CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE] = 0x00000000;
+    CP15[CP15_TLB_DEBUG_CONTROL] = 0x00000000;
+}
+
+u8 ARMul_State::ReadMemory8(u32 address) const
+{
+    return (*user_callbacks.MemoryRead8)(address);
+}
+
+u16 ARMul_State::ReadMemory16(u32 address) const
+{
+    u16 data = (*user_callbacks.MemoryRead16)(address);
+
+    if (InBigEndianMode())
+        data = Common::swap16(data);
+
+    return data;
+}
+
+u32 ARMul_State::ReadMemory32(u32 address) const
+{
+    u32 data = (*user_callbacks.MemoryRead32)(address);
+
+    if (InBigEndianMode())
+        data = Common::swap32(data);
+
+    return data;
+}
+
+u64 ARMul_State::ReadMemory64(u32 address) const
+{
+    u64 data = (*user_callbacks.MemoryRead64)(address);
+
+    if (InBigEndianMode())
+        data = Common::swap64(data);
+
+    return data;
+}
+
+void ARMul_State::WriteMemory8(u32 address, u8 data)
+{
+    (*user_callbacks.MemoryWrite8)(address, data);
+}
+
+void ARMul_State::WriteMemory16(u32 address, u16 data)
+{
+    if (InBigEndianMode())
+        data = Common::swap16(data);
+
+    (*user_callbacks.MemoryWrite16)(address, data);
+}
+
+void ARMul_State::WriteMemory32(u32 address, u32 data)
+{
+    if (InBigEndianMode())
+        data = Common::swap32(data);
+
+    (*user_callbacks.MemoryWrite32)(address, data);
+}
+
+void ARMul_State::WriteMemory64(u32 address, u64 data)
+{
+    if (InBigEndianMode())
+        data = Common::swap64(data);
+
+    (*user_callbacks.MemoryWrite64)(address, data);
+}
+
+
+// Reads from the CP15 registers. Used with implementation of the MRC instruction.
+// Note that since the 3DS does not have the hypervisor extensions, these registers
+// are not implemented.
+u32 ARMul_State::ReadCP15Register(u32 crn, u32 opcode_1, u32 crm, u32 opcode_2) const
+{
+    // Unprivileged registers
+    if (crn == 13 && opcode_1 == 0 && crm == 0)
+    {
+        if (opcode_2 == 2)
+            return CP15[CP15_THREAD_UPRW];
+
+        if (opcode_2 == 3)
+            return CP15[CP15_THREAD_URO];
+    }
+
+    if (InAPrivilegedMode())
+    {
+        if (crn == 0 && opcode_1 == 0)
+        {
+            if (crm == 0)
+            {
+                if (opcode_2 == 0)
+                    return CP15[CP15_MAIN_ID];
+
+                if (opcode_2 == 1)
+                    return CP15[CP15_CACHE_TYPE];
+
+                if (opcode_2 == 3)
+                    return CP15[CP15_TLB_TYPE];
+
+                if (opcode_2 == 5)
+                    return CP15[CP15_CPU_ID];
+            }
+            else if (crm == 1)
+            {
+                if (opcode_2 == 0)
+                    return CP15[CP15_PROCESSOR_FEATURE_0];
+
+                if (opcode_2 == 1)
+                    return CP15[CP15_PROCESSOR_FEATURE_1];
+
+                if (opcode_2 == 2)
+                    return CP15[CP15_DEBUG_FEATURE_0];
+
+                if (opcode_2 == 4)
+                    return CP15[CP15_MEMORY_MODEL_FEATURE_0];
+
+                if (opcode_2 == 5)
+                    return CP15[CP15_MEMORY_MODEL_FEATURE_1];
+
+                if (opcode_2 == 6)
+                    return CP15[CP15_MEMORY_MODEL_FEATURE_2];
+
+                if (opcode_2 == 7)
+                    return CP15[CP15_MEMORY_MODEL_FEATURE_3];
+            }
+            else if (crm == 2)
+            {
+                if (opcode_2 == 0)
+                    return CP15[CP15_ISA_FEATURE_0];
+
+                if (opcode_2 == 1)
+                    return CP15[CP15_ISA_FEATURE_1];
+
+                if (opcode_2 == 2)
+                    return CP15[CP15_ISA_FEATURE_2];
+
+                if (opcode_2 == 3)
+                    return CP15[CP15_ISA_FEATURE_3];
+
+                if (opcode_2 == 4)
+                    return CP15[CP15_ISA_FEATURE_4];
+            }
+        }
+
+        if (crn == 1 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                return CP15[CP15_CONTROL];
+
+            if (opcode_2 == 1)
+                return CP15[CP15_AUXILIARY_CONTROL];
+
+            if (opcode_2 == 2)
+                return CP15[CP15_COPROCESSOR_ACCESS_CONTROL];
+        }
+
+        if (crn == 2 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                return CP15[CP15_TRANSLATION_BASE_TABLE_0];
+
+            if (opcode_2 == 1)
+                return CP15[CP15_TRANSLATION_BASE_TABLE_1];
+
+            if (opcode_2 == 2)
+                return CP15[CP15_TRANSLATION_BASE_CONTROL];
+        }
+
+        if (crn == 3 && opcode_1 == 0 && crm == 0 && opcode_2 == 0)
+            return CP15[CP15_DOMAIN_ACCESS_CONTROL];
+
+        if (crn == 5 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                return CP15[CP15_FAULT_STATUS];
+
+            if (opcode_2 == 1)
+                return CP15[CP15_INSTR_FAULT_STATUS];
+        }
+
+        if (crn == 6 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                return CP15[CP15_FAULT_ADDRESS];
+
+            if (opcode_2 == 1)
+                return CP15[CP15_WFAR];
+        }
+
+        if (crn == 7 && opcode_1 == 0 && crm == 4 && opcode_2 == 0)
+            return CP15[CP15_PHYS_ADDRESS];
+
+        if (crn == 9 && opcode_1 == 0 && crm == 0 && opcode_2 == 0)
+            return CP15[CP15_DATA_CACHE_LOCKDOWN];
+
+        if (crn == 10 && opcode_1 == 0)
+        {
+            if (crm == 0 && opcode_2 == 0)
+                return CP15[CP15_TLB_LOCKDOWN];
+
+            if (crm == 2)
+            {
+                if (opcode_2 == 0)
+                    return CP15[CP15_PRIMARY_REGION_REMAP];
+
+                if (opcode_2 == 1)
+                    return CP15[CP15_NORMAL_REGION_REMAP];
+            }
+        }
+
+        if (crn == 13 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                return CP15[CP15_PID];
+
+            if (opcode_2 == 1)
+                return CP15[CP15_CONTEXT_ID];
+
+            if (opcode_2 == 4)
+                return CP15[CP15_THREAD_PRW];
+        }
+
+        if (crn == 15)
+        {
+            if (opcode_1 == 0 && crm == 12)
+            {
+                if (opcode_2 == 0)
+                    return CP15[CP15_PERFORMANCE_MONITOR_CONTROL];
+
+                if (opcode_2 == 1)
+                    return CP15[CP15_CYCLE_COUNTER];
+
+                if (opcode_2 == 2)
+                    return CP15[CP15_COUNT_0];
+
+                if (opcode_2 == 3)
+                    return CP15[CP15_COUNT_1];
+            }
+
+            if (opcode_1 == 5 && opcode_2 == 2)
+            {
+                if (crm == 5)
+                    return CP15[CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS];
+
+                if (crm == 6)
+                    return CP15[CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS];
+
+                if (crm == 7)
+                    return CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE];
+            }
+
+            if (opcode_1 == 7 && crm == 1 && opcode_2 == 0)
+                return CP15[CP15_TLB_DEBUG_CONTROL];
+        }
+    }
+
+    LOG_ERROR(Core_ARM11, "MRC CRn=%u, CRm=%u, OP1=%u OP2=%u is not implemented. Returning zero.", crn, crm, opcode_1, opcode_2);
+    return 0;
+}
+
+// Write to the CP15 registers. Used with implementation of the MCR instruction.
+// Note that since the 3DS does not have the hypervisor extensions, these registers
+// are not implemented.
+void ARMul_State::WriteCP15Register(u32 value, u32 crn, u32 opcode_1, u32 crm, u32 opcode_2)
+{
+    if (InAPrivilegedMode())
+    {
+        if (crn == 1 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                CP15[CP15_CONTROL] = value;
+            else if (opcode_2 == 1)
+                CP15[CP15_AUXILIARY_CONTROL] = value;
+            else if (opcode_2 == 2)
+                CP15[CP15_COPROCESSOR_ACCESS_CONTROL] = value;
+        }
+        else if (crn == 2 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                CP15[CP15_TRANSLATION_BASE_TABLE_0] = value;
+            else if (opcode_2 == 1)
+                CP15[CP15_TRANSLATION_BASE_TABLE_1] = value;
+            else if (opcode_2 == 2)
+                CP15[CP15_TRANSLATION_BASE_CONTROL] = value;
+        }
+        else if (crn == 3 && opcode_1 == 0 && crm == 0 && opcode_2 == 0)
+        {
+            CP15[CP15_DOMAIN_ACCESS_CONTROL] = value;
+        }
+        else if (crn == 5 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                CP15[CP15_FAULT_STATUS] = value;
+            else if (opcode_2 == 1)
+                CP15[CP15_INSTR_FAULT_STATUS] = value;
+        }
+        else if (crn == 6 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                CP15[CP15_FAULT_ADDRESS] = value;
+            else if (opcode_2 == 1)
+                CP15[CP15_WFAR] = value;
+        }
+        else if (crn == 7 && opcode_1 == 0)
+        {
+            if (crm == 0 && opcode_2 == 4)
+            {
+                CP15[CP15_WAIT_FOR_INTERRUPT] = value;
+            }
+            else if (crm == 4 && opcode_2 == 0)
+            {
+                // NOTE: Not entirely accurate. This should do permission checks.
+                // TODO: Implement this maybe.
+                // CP15[CP15_PHYS_ADDRESS] = Memory::VirtualToPhysicalAddress(value);
+            }
+            else if (crm == 5)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_INVALIDATE_INSTR_CACHE] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_INVALIDATE_INSTR_CACHE_USING_MVA] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_INVALIDATE_INSTR_CACHE_USING_INDEX] = value;
+                else if (opcode_2 == 6)
+                    CP15[CP15_FLUSH_BRANCH_TARGET_CACHE] = value;
+                else if (opcode_2 == 7)
+                    CP15[CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY] = value;
+            }
+            else if (crm == 6)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_INVALIDATE_DATA_CACHE] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX] = value;
+            }
+            else if (crm == 7 && opcode_2 == 0)
+            {
+                CP15[CP15_INVALIDATE_DATA_AND_INSTR_CACHE] = value;
+            }
+            else if (crm == 10)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_CLEAN_DATA_CACHE] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_CLEAN_DATA_CACHE_LINE_USING_MVA] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX] = value;
+            }
+            else if (crm == 14)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_CLEAN_AND_INVALIDATE_DATA_CACHE] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX] = value;
+            }
+        }
+        else if (crn == 8 && opcode_1 == 0)
+        {
+            if (crm == 5)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_INVALIDATE_ITLB] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_INVALIDATE_ITLB_SINGLE_ENTRY] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH] = value;
+                else if (opcode_2 == 3)
+                    CP15[CP15_INVALIDATE_ITLB_ENTRY_ON_MVA] = value;
+            }
+            else if (crm == 6)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_INVALIDATE_DTLB] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_INVALIDATE_DTLB_SINGLE_ENTRY] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH] = value;
+                else if (opcode_2 == 3)
+                    CP15[CP15_INVALIDATE_DTLB_ENTRY_ON_MVA] = value;
+            }
+            else if (crm == 7)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_INVALIDATE_UTLB] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_INVALIDATE_UTLB_SINGLE_ENTRY] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH] = value;
+                else if (opcode_2 == 3)
+                    CP15[CP15_INVALIDATE_UTLB_ENTRY_ON_MVA] = value;
+            }
+        }
+        else if (crn == 9 && opcode_1 == 0 && crm == 0 && opcode_2 == 0)
+        {
+            CP15[CP15_DATA_CACHE_LOCKDOWN] = value;
+        }
+        else if (crn == 10 && opcode_1 == 0)
+        {
+            if (crm == 0 && opcode_2 == 0)
+            {
+                CP15[CP15_TLB_LOCKDOWN] = value;
+            }
+            else if (crm == 2)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_PRIMARY_REGION_REMAP] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_NORMAL_REGION_REMAP] = value;
+            }
+        }
+        else if (crn == 13 && opcode_1 == 0 && crm == 0)
+        {
+            if (opcode_2 == 0)
+                CP15[CP15_PID] = value;
+            else if (opcode_2 == 1)
+                CP15[CP15_CONTEXT_ID] = value;
+            else if (opcode_2 == 3)
+                CP15[CP15_THREAD_URO] = value;
+            else if (opcode_2 == 4)
+                CP15[CP15_THREAD_PRW] = value;
+        }
+        else if (crn == 15)
+        {
+            if (opcode_1 == 0 && crm == 12)
+            {
+                if (opcode_2 == 0)
+                    CP15[CP15_PERFORMANCE_MONITOR_CONTROL] = value;
+                else if (opcode_2 == 1)
+                    CP15[CP15_CYCLE_COUNTER] = value;
+                else if (opcode_2 == 2)
+                    CP15[CP15_COUNT_0] = value;
+                else if (opcode_2 == 3)
+                    CP15[CP15_COUNT_1] = value;
+            }
+            else if (opcode_1 == 5)
+            {
+                if (crm == 4)
+                {
+                    if (opcode_2 == 2)
+                        CP15[CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY] = value;
+                    else if (opcode_2 == 4)
+                        CP15[CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY] = value;
+                }
+                else if (crm == 5 && opcode_2 == 2)
+                {
+                    CP15[CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS] = value;
+                }
+                else if (crm == 6 && opcode_2 == 2)
+                {
+                    CP15[CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS] = value;
+                }
+                else if (crm == 7 && opcode_2 == 2)
+                {
+                    CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE] = value;
+                }
+            }
+            else if (opcode_1 == 7 && crm == 1 && opcode_2 == 0)
+            {
+                CP15[CP15_TLB_DEBUG_CONTROL] = value;
+            }
+        }
+    }
+
+    // Unprivileged registers
+    if (crn == 7 && opcode_1 == 0 && crm == 5 && opcode_2 == 4)
+    {
+        CP15[CP15_FLUSH_PREFETCH_BUFFER] = value;
+    }
+    else if (crn == 7 && opcode_1 == 0 && crm == 10)
+    {
+        if (opcode_2 == 4)
+            CP15[CP15_DATA_SYNC_BARRIER] = value;
+        else if (opcode_2 == 5)
+            CP15[CP15_DATA_MEMORY_BARRIER] = value;
+    }
+    else if (crn == 13 && opcode_1 == 0 && crm == 0 && opcode_2 == 2)
+    {
+        CP15[CP15_THREAD_UPRW] = value;
+    }
+}
diff --git a/tests/skyeye_interpreter/skyeye_common/armstate.h b/tests/skyeye_interpreter/skyeye_common/armstate.h
new file mode 100644
index 00000000..e85eea9f
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/armstate.h
@@ -0,0 +1,255 @@
+/*  armdefs.h -- ARMulator common definitions:  ARM6 Instruction Emulator.
+    Copyright (C) 1994 Advanced RISC Machines Ltd.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#pragma once
+
+#include <array>
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "interface/interface.h"
+#include "tests/skyeye_interpreter/skyeye_common/arm_regformat.h"
+
+// Signal levels
+enum {
+    LOW     = 0,
+    HIGH    = 1,
+    LOWHIGH = 1,
+    HIGHLOW = 2
+};
+
+// Cache types
+enum {
+    NONCACHE  = 0,
+    DATACACHE = 1,
+    INSTCACHE = 2,
+};
+
+// ARM privilege modes
+enum PrivilegeMode {
+    USER32MODE   = 16,
+    FIQ32MODE    = 17,
+    IRQ32MODE    = 18,
+    SVC32MODE    = 19,
+    ABORT32MODE  = 23,
+    UNDEF32MODE  = 27,
+    SYSTEM32MODE = 31
+};
+
+// ARM privilege mode register banks
+enum {
+    USERBANK   = 0,
+    FIQBANK    = 1,
+    IRQBANK    = 2,
+    SVCBANK    = 3,
+    ABORTBANK  = 4,
+    UNDEFBANK  = 5,
+    DUMMYBANK  = 6,
+    SYSTEMBANK = 7
+};
+
+// Hardware vector addresses
+enum {
+    ARMResetV          = 0,
+    ARMUndefinedInstrV = 4,
+    ARMSWIV            = 8,
+    ARMPrefetchAbortV  = 12,
+    ARMDataAbortV      = 16,
+    ARMAddrExceptnV    = 20,
+    ARMIRQV            = 24,
+    ARMFIQV            = 28,
+    ARMErrorV          = 32, // This is an offset, not an address!
+
+    ARMul_ResetV          = ARMResetV,
+    ARMul_UndefinedInstrV = ARMUndefinedInstrV,
+    ARMul_SWIV            = ARMSWIV,
+    ARMul_PrefetchAbortV  = ARMPrefetchAbortV,
+    ARMul_DataAbortV      = ARMDataAbortV,
+    ARMul_AddrExceptnV    = ARMAddrExceptnV,
+    ARMul_IRQV            = ARMIRQV,
+    ARMul_FIQV            = ARMFIQV
+};
+
+// Coprocessor status values
+enum {
+    ARMul_FIRST     = 0,
+    ARMul_TRANSFER  = 1,
+    ARMul_BUSY      = 2,
+    ARMul_DATA      = 3,
+    ARMul_INTERRUPT = 4,
+    ARMul_DONE      = 0,
+    ARMul_CANT      = 1,
+    ARMul_INC       = 3
+};
+
+// Instruction condition codes
+enum ConditionCode {
+    EQ = 0,
+    NE = 1,
+    CS = 2,
+    CC = 3,
+    MI = 4,
+    PL = 5,
+    VS = 6,
+    VC = 7,
+    HI = 8,
+    LS = 9,
+    GE = 10,
+    LT = 11,
+    GT = 12,
+    LE = 13,
+    AL = 14,
+    NV = 15,
+};
+
+// Flags for use with the APSR.
+enum : u32 {
+    NBIT = (1U << 31U),
+    ZBIT = (1 << 30),
+    CBIT = (1 << 29),
+    VBIT = (1 << 28),
+    QBIT = (1 << 27),
+    JBIT = (1 << 24),
+    EBIT = (1 << 9),
+    ABIT = (1 << 8),
+    IBIT = (1 << 7),
+    FBIT = (1 << 6),
+    TBIT = (1 << 5),
+
+    // Masks for groups of bits in the APSR.
+    MODEBITS = 0x1F,
+    INTBITS = 0x1C0,
+};
+
+// Values for Emulate.
+enum {
+    STOP       = 0, // Stop
+    CHANGEMODE = 1, // Change mode
+    ONCE       = 2, // Execute just one iteration
+    RUN        = 3  // Continuous execution
+};
+
+
+struct ARMul_State final
+{
+public:
+    explicit ARMul_State(PrivilegeMode initial_mode);
+
+    void ChangePrivilegeMode(u32 new_mode);
+    void Reset();
+
+    // Reads/writes data in big/little endian format based on the
+    // state of the E (endian) bit in the APSR.
+    u8 ReadMemory8(u32 address) const;
+    u16 ReadMemory16(u32 address) const;
+    u32 ReadMemory32(u32 address) const;
+    u64 ReadMemory64(u32 address) const;
+    void WriteMemory8(u32 address, u8 data);
+    void WriteMemory16(u32 address, u16 data);
+    void WriteMemory32(u32 address, u32 data);
+    void WriteMemory64(u32 address, u64 data);
+
+    u32 ReadCP15Register(u32 crn, u32 opcode_1, u32 crm, u32 opcode_2) const;
+    void WriteCP15Register(u32 value, u32 crn, u32 opcode_1, u32 crm, u32 opcode_2);
+
+    // Exclusive memory access functions
+    bool IsExclusiveMemoryAccess(u32 address) const {
+        return exclusive_state && exclusive_tag == (address & RESERVATION_GRANULE_MASK);
+    }
+    void SetExclusiveMemoryAddress(u32 address) {
+        exclusive_tag = address & RESERVATION_GRANULE_MASK;
+        exclusive_state = true;
+    }
+    void UnsetExclusiveMemoryAddress() {
+        exclusive_tag = 0xFFFFFFFF;
+        exclusive_state = false;
+    }
+
+    // Whether or not the given CPU is in big endian mode (E bit is set)
+    bool InBigEndianMode() const {
+        return (Cpsr & (1 << 9)) != 0;
+    }
+    // Whether or not the given CPU is in a mode other than user mode.
+    bool InAPrivilegedMode() const {
+        return (Mode != USER32MODE);
+    }
+    // Note that for the 3DS, a Thumb instruction will only ever be
+    // two bytes in size. Thus we don't need to worry about ThumbEE
+    // or Thumb-2 where instructions can be 4 bytes in length.
+    u32 GetInstructionSize() const {
+        return TFlag ? 2 : 4;
+    }
+
+    std::array<u32, 16> Reg{};      // The current register file
+    std::array<u32, 2> Reg_usr{};
+    std::array<u32, 2> Reg_svc{};   // R13_SVC R14_SVC
+    std::array<u32, 2> Reg_abort{}; // R13_ABORT R14_ABORT
+    std::array<u32, 2> Reg_undef{}; // R13 UNDEF R14 UNDEF
+    std::array<u32, 2> Reg_irq{};   // R13_IRQ R14_IRQ
+    std::array<u32, 7> Reg_firq{};  // R8---R14 FIRQ
+    std::array<u32, 7> Spsr{};      // The exception psr's
+    std::array<u32, CP15_REGISTER_COUNT> CP15{};
+
+    // FPSID, FPSCR, and FPEXC
+    std::array<u32, VFP_SYSTEM_REGISTER_COUNT> VFP{};
+
+    // VFPv2 and VFPv3-D16 has 16 doubleword registers (D0-D16 or S0-S31).
+    // VFPv3-D32/ASIMD may have up to 32 doubleword registers (D0-D31),
+    // and only 32 singleword registers are accessible (S0-S31).
+    std::array<u32, 64> ExtReg{};
+
+    u32 Emulate; // To start and stop emulation
+    u32 Cpsr;    // The current PSR
+    u32 Spsr_copy;
+    u32 phys_pc;
+
+    u32 Mode;          // The current mode
+    u32 Bank;          // The current register bank
+
+    u32 NFlag, ZFlag, CFlag, VFlag, IFFlags; // Dummy flags for speed
+    unsigned int shifter_carry_out;
+
+    u32 TFlag; // Thumb state
+
+    unsigned long long NumInstrs; // The number of instructions executed
+    unsigned NumInstrsToExecute;
+
+    unsigned NresetSig; // Reset the processor
+    unsigned NfiqSig;
+    unsigned NirqSig;
+
+    unsigned abortSig;
+    unsigned NtransSig;
+    unsigned bigendSig;
+    unsigned syscallSig;
+
+    // TODO(bunnei): Move this cache to a better place - it should be per codeset (likely per
+    // process for our purposes), not per ARMul_State (which tracks CPU core state).
+    std::unordered_map<u32, int> instruction_cache;
+
+    void ResetMPCoreCP15Registers();
+
+    // Defines a reservation granule of 2 words, which protects the first 2 words starting at the tag.
+    // This is the smallest granule allowed by the v7 spec, and is coincidentally just large enough to
+    // support LDR/STREXD.
+    static const u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
+
+    u32 exclusive_tag; // The address for which the local monitor is in exclusive access mode
+    bool exclusive_state;
+
+    Dynarmic::UserCallbacks user_callbacks;
+};
diff --git a/tests/skyeye_interpreter/skyeye_common/armsupp.cpp b/tests/skyeye_interpreter/skyeye_common/armsupp.cpp
new file mode 100644
index 00000000..a1ff2022
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/armsupp.cpp
@@ -0,0 +1,207 @@
+/*  armsupp.c -- ARMulator support code:  ARM6 Instruction Emulator.
+    Copyright (C) 1994 Advanced RISC Machines Ltd.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "common/logging/log.h"
+
+#include "tests/skyeye_interpreter/skyeye_common/arm_regformat.h"
+#include "tests/skyeye_interpreter/skyeye_common/armstate.h"
+#include "tests/skyeye_interpreter/skyeye_common/armsupp.h"
+
+// Unsigned sum of absolute difference
+u8 ARMul_UnsignedAbsoluteDifference(u8 left, u8 right)
+{
+    if (left > right)
+        return left - right;
+
+    return right - left;
+}
+
+// Add with carry, indicates if a carry-out or signed overflow occurred.
+u32 AddWithCarry(u32 left, u32 right, u32 carry_in, bool* carry_out_occurred, bool* overflow_occurred)
+{
+    u64 unsigned_sum = (u64)left + (u64)right + (u64)carry_in;
+    s64 signed_sum = (s64)(s32)left + (s64)(s32)right + (s64)carry_in;
+    u64 result = (unsigned_sum & 0xFFFFFFFF);
+
+    if (carry_out_occurred)
+        *carry_out_occurred = (result != unsigned_sum);
+
+    if (overflow_occurred)
+        *overflow_occurred = ((s64)(s32)result != signed_sum);
+
+    return (u32)result;
+}
+
+// Compute whether an addition of A and B, giving RESULT, overflowed.
+bool AddOverflow(u32 a, u32 b, u32 result)
+{
+    return ((NEG(a) && NEG(b) && POS(result)) ||
+            (POS(a) && POS(b) && NEG(result)));
+}
+
+// Compute whether a subtraction of A and B, giving RESULT, overflowed.
+bool SubOverflow(u32 a, u32 b, u32 result)
+{
+    return ((NEG(a) && POS(b) && POS(result)) ||
+            (POS(a) && NEG(b) && NEG(result)));
+}
+
+// Returns true if the Q flag should be set as a result of overflow.
+bool ARMul_AddOverflowQ(u32 a, u32 b)
+{
+    u32 result = a + b;
+    if (((result ^ a) & (u32)0x80000000) && ((a ^ b) & (u32)0x80000000) == 0)
+        return true;
+
+    return false;
+}
+
+// 8-bit signed saturated addition
+u8 ARMul_SignedSaturatedAdd8(u8 left, u8 right)
+{
+    u8 result = left + right;
+
+    if (((result ^ left) & 0x80) && ((left ^ right) & 0x80) == 0) {
+        if (left & 0x80)
+            result = 0x80;
+        else
+            result = 0x7F;
+    }
+
+    return result;
+}
+
+// 8-bit signed saturated subtraction
+u8 ARMul_SignedSaturatedSub8(u8 left, u8 right)
+{
+    u8 result = left - right;
+
+    if (((result ^ left) & 0x80) && ((left ^ right) & 0x80) != 0) {
+        if (left & 0x80)
+            result = 0x80;
+        else
+            result = 0x7F;
+    }
+
+    return result;
+}
+
+// 16-bit signed saturated addition
+u16 ARMul_SignedSaturatedAdd16(u16 left, u16 right)
+{
+    u16 result = left + right;
+
+    if (((result ^ left) & 0x8000) && ((left ^ right) & 0x8000) == 0) {
+        if (left & 0x8000)
+            result = 0x8000;
+        else
+            result = 0x7FFF;
+    }
+
+    return result;
+}
+
+// 16-bit signed saturated subtraction
+u16 ARMul_SignedSaturatedSub16(u16 left, u16 right)
+{
+    u16 result = left - right;
+
+    if (((result ^ left) & 0x8000) && ((left ^ right) & 0x8000) != 0) {
+        if (left & 0x8000)
+            result = 0x8000;
+        else
+            result = 0x7FFF;
+    }
+
+    return result;
+}
+
+// 8-bit unsigned saturated addition
+u8 ARMul_UnsignedSaturatedAdd8(u8 left, u8 right)
+{
+    u8 result = left + right;
+
+    if (result < left)
+        result = 0xFF;
+
+    return result;
+}
+
+// 16-bit unsigned saturated addition
+u16 ARMul_UnsignedSaturatedAdd16(u16 left, u16 right)
+{
+    u16 result = left + right;
+
+    if (result < left)
+        result = 0xFFFF;
+
+    return result;
+}
+
+// 8-bit unsigned saturated subtraction
+u8 ARMul_UnsignedSaturatedSub8(u8 left, u8 right)
+{
+    if (left <= right)
+        return 0;
+
+    return left - right;
+}
+
+// 16-bit unsigned saturated subtraction
+u16 ARMul_UnsignedSaturatedSub16(u16 left, u16 right)
+{
+    if (left <= right)
+        return 0;
+
+    return left - right;
+}
+
+// Signed saturation.
+u32 ARMul_SignedSatQ(s32 value, u8 shift, bool* saturation_occurred)
+{
+    const u32 max = (1 << shift) - 1;
+    const s32 top = (value >> shift);
+
+    if (top > 0) {
+        *saturation_occurred = true;
+        return max;
+    }
+    else if (top < -1) {
+        *saturation_occurred = true;
+        return ~max;
+    }
+
+    *saturation_occurred = false;
+    return (u32)value;
+}
+
+// Unsigned saturation
+u32 ARMul_UnsignedSatQ(s32 value, u8 shift, bool* saturation_occurred)
+{
+    const u32 max = (1 << shift) - 1;
+
+    if (value < 0) {
+        *saturation_occurred = true;
+        return 0;
+    } else if ((u32)value > max) {
+        *saturation_occurred = true;
+        return max;
+    }
+
+    *saturation_occurred = false;
+    return (u32)value;
+}
diff --git a/tests/skyeye_interpreter/skyeye_common/armsupp.h b/tests/skyeye_interpreter/skyeye_common/armsupp.h
new file mode 100644
index 00000000..391309fa
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/armsupp.h
@@ -0,0 +1,32 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+#define BITS(s, a, b) ((s << ((sizeof(s) * 8 - 1) - b)) >> (sizeof(s) * 8 - b + a - 1))
+#define BIT(s, n) ((s >> (n)) & 1)
+
+#define POS(i) ( (~(i)) >> 31 )
+#define NEG(i) ( (i) >> 31 )
+
+bool AddOverflow(u32, u32, u32);
+bool SubOverflow(u32, u32, u32);
+
+u32 AddWithCarry(u32, u32, u32, bool*, bool*);
+bool ARMul_AddOverflowQ(u32, u32);
+
+u8 ARMul_SignedSaturatedAdd8(u8, u8);
+u8 ARMul_SignedSaturatedSub8(u8, u8);
+u16 ARMul_SignedSaturatedAdd16(u16, u16);
+u16 ARMul_SignedSaturatedSub16(u16, u16);
+
+u8 ARMul_UnsignedSaturatedAdd8(u8, u8);
+u16 ARMul_UnsignedSaturatedAdd16(u16, u16);
+u8 ARMul_UnsignedSaturatedSub8(u8, u8);
+u16 ARMul_UnsignedSaturatedSub16(u16, u16);
+u8 ARMul_UnsignedAbsoluteDifference(u8, u8);
+u32 ARMul_SignedSatQ(s32, u8, bool*);
+u32 ARMul_UnsignedSatQ(s32, u8, bool*);
diff --git a/tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h b/tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h
new file mode 100644
index 00000000..1187924f
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h
@@ -0,0 +1,83 @@
+/*
+ * arch/arm/include/asm/vfp.h
+ *
+ * VFP register definitions.
+ * First, the standard VFP set.
+ */
+
+#pragma once
+
+// ARM11 MPCore FPSID Information
+// Note that these are used as values and not as flags.
+enum : u32 {
+    VFP_FPSID_IMPLMEN  = 0x41, // Implementation code. Should be the same as cp15 0 c0 0
+    VFP_FPSID_SW       = 0,    // Software emulation bit value
+    VFP_FPSID_SUBARCH  = 0x1,  // Subarchitecture version number
+    VFP_FPSID_PARTNUM  = 0x20, // Part number
+    VFP_FPSID_VARIANT  = 0xB,  // Variant number
+    VFP_FPSID_REVISION = 0x4   // Revision number
+};
+
+// FPEXC bits
+enum : u32 {
+    FPEXC_EX          = (1U << 31U),
+    FPEXC_EN          = (1 << 30),
+    FPEXC_DEX         = (1 << 29),
+    FPEXC_FP2V        = (1 << 28),
+    FPEXC_VV          = (1 << 27),
+    FPEXC_TFV         = (1 << 26),
+    FPEXC_LENGTH_BIT  = (8),
+    FPEXC_LENGTH_MASK = (7 << FPEXC_LENGTH_BIT),
+    FPEXC_IDF         = (1 << 7),
+    FPEXC_IXF         = (1 << 4),
+    FPEXC_UFF         = (1 << 3),
+    FPEXC_OFF         = (1 << 2),
+    FPEXC_DZF         = (1 << 1),
+    FPEXC_IOF         = (1 << 0),
+    FPEXC_TRAP_MASK   = (FPEXC_IDF|FPEXC_IXF|FPEXC_UFF|FPEXC_OFF|FPEXC_DZF|FPEXC_IOF)
+};
+
+// FPSCR Flags
+enum : u32 {
+    FPSCR_NFLAG         = (1U << 31U), // Negative condition flag
+    FPSCR_ZFLAG         = (1 << 30),   // Zero condition flag
+    FPSCR_CFLAG         = (1 << 29),   // Carry condition flag
+    FPSCR_VFLAG         = (1 << 28),   // Overflow condition flag
+
+    FPSCR_QC            = (1 << 27),   // Cumulative saturation bit
+    FPSCR_AHP           = (1 << 26),   // Alternative half-precision control bit
+    FPSCR_DEFAULT_NAN   = (1 << 25),   // Default NaN mode control bit
+    FPSCR_FLUSH_TO_ZERO = (1 << 24),   // Flush-to-zero mode control bit
+    FPSCR_RMODE_MASK    = (3 << 22),   // Rounding Mode bit mask
+    FPSCR_STRIDE_MASK   = (3 << 20),   // Vector stride bit mask
+    FPSCR_LENGTH_MASK   = (7 << 16),   // Vector length bit mask
+
+    FPSCR_IDE           = (1 << 15),   // Input Denormal exception trap enable.
+    FPSCR_IXE           = (1 << 12),   // Inexact exception trap enable
+    FPSCR_UFE           = (1 << 11),   // Undeflow exception trap enable
+    FPSCR_OFE           = (1 << 10),   // Overflow exception trap enable
+    FPSCR_DZE           = (1 << 9),    // Division by Zero exception trap enable
+    FPSCR_IOE           = (1 << 8),    // Invalid Operation exception trap enable
+
+    FPSCR_IDC           = (1 << 7),    // Input Denormal cumulative exception bit
+    FPSCR_IXC           = (1 << 4),    // Inexact cumulative exception bit
+    FPSCR_UFC           = (1 << 3),    // Undeflow cumulative exception bit
+    FPSCR_OFC           = (1 << 2),    // Overflow cumulative exception bit
+    FPSCR_DZC           = (1 << 1),    // Division by Zero cumulative exception bit
+    FPSCR_IOC           = (1 << 0),    // Invalid Operation cumulative exception bit
+};
+
+// FPSCR bit offsets
+enum : u32 {
+    FPSCR_RMODE_BIT  = 22,
+    FPSCR_STRIDE_BIT = 20,
+    FPSCR_LENGTH_BIT = 16,
+};
+
+// FPSCR rounding modes
+enum : u32 {
+    FPSCR_ROUND_NEAREST  = (0 << 22),
+    FPSCR_ROUND_PLUSINF  = (1 << 22),
+    FPSCR_ROUND_MINUSINF = (2 << 22),
+    FPSCR_ROUND_TOZERO   = (3 << 22)
+};
diff --git a/tests/skyeye_interpreter/skyeye_common/vfp/vfp.cpp b/tests/skyeye_interpreter/skyeye_common/vfp/vfp.cpp
new file mode 100644
index 00000000..d804ea0a
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/vfp/vfp.cpp
@@ -0,0 +1,162 @@
+/*
+    armvfp.c - ARM VFPv3 emulation unit
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Note: this file handles interface with arm core and vfp registers */
+
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+#include "tests/skyeye_interpreter/skyeye_common/armstate.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp.h"
+
+void VFPInit(ARMul_State* state)
+{
+    state->VFP[VFP_FPSID] = VFP_FPSID_IMPLMEN<<24 | VFP_FPSID_SW<<23 | VFP_FPSID_SUBARCH<<16 |
+                            VFP_FPSID_PARTNUM<<8 | VFP_FPSID_VARIANT<<4 | VFP_FPSID_REVISION;
+    state->VFP[VFP_FPEXC] = 0;
+    state->VFP[VFP_FPSCR] = 0;
+
+    // ARM11 MPCore instruction register reset values.
+    state->VFP[VFP_FPINST]  = 0xEE000A00;
+    state->VFP[VFP_FPINST2] = 0;
+
+    // ARM11 MPCore feature register values.
+    state->VFP[VFP_MVFR0] = 0x11111111;
+    state->VFP[VFP_MVFR1] = 0;
+}
+
+void VMOVBRS(ARMul_State* state, u32 to_arm, u32 t, u32 n, u32* value)
+{
+    if (to_arm)
+    {
+        *value = state->ExtReg[n];
+    }
+    else
+    {
+        state->ExtReg[n] = *value;
+    }
+}
+
+void VMOVBRRD(ARMul_State* state, u32 to_arm, u32 t, u32 t2, u32 n, u32* value1, u32* value2)
+{
+    if (to_arm)
+    {
+        *value2 = state->ExtReg[n*2+1];
+        *value1 = state->ExtReg[n*2];
+    }
+    else
+    {
+        state->ExtReg[n*2+1] = *value2;
+        state->ExtReg[n*2] = *value1;
+    }
+}
+void VMOVBRRSS(ARMul_State* state, u32 to_arm, u32 t, u32 t2, u32 n, u32* value1, u32* value2)
+{
+    if (to_arm)
+    {
+        *value1 = state->ExtReg[n+0];
+        *value2 = state->ExtReg[n+1];
+    }
+    else
+    {
+        state->ExtReg[n+0] = *value1;
+        state->ExtReg[n+1] = *value2;
+    }
+}
+
+void VMOVI(ARMul_State* state, u32 single, u32 d, u32 imm)
+{
+    if (single)
+    {
+        state->ExtReg[d] = imm;
+    }
+    else
+    {
+        /* Check endian please */
+        state->ExtReg[d*2+1] = imm;
+        state->ExtReg[d*2] = 0;
+    }
+}
+void VMOVR(ARMul_State* state, u32 single, u32 d, u32 m)
+{
+    if (single)
+    {
+        state->ExtReg[d] = state->ExtReg[m];
+    }
+    else
+    {
+        /* Check endian please */
+        state->ExtReg[d*2+1] = state->ExtReg[m*2+1];
+        state->ExtReg[d*2] = state->ExtReg[m*2];
+    }
+}
+
+/* Miscellaneous functions */
+s32 vfp_get_float(ARMul_State* state, unsigned int reg)
+{
+    LOG_TRACE(Core_ARM11, "VFP get float: s%d=[%08x]", reg, state->ExtReg[reg]);
+    return state->ExtReg[reg];
+}
+
+void vfp_put_float(ARMul_State* state, s32 val, unsigned int reg)
+{
+    LOG_TRACE(Core_ARM11, "VFP put float: s%d <= [%08x]", reg, val);
+    state->ExtReg[reg] = val;
+}
+
+u64 vfp_get_double(ARMul_State* state, unsigned int reg)
+{
+    u64 result = ((u64) state->ExtReg[reg*2+1])<<32 | state->ExtReg[reg*2];
+    LOG_TRACE(Core_ARM11, "VFP get double: s[%d-%d]=[%016llx]", reg * 2 + 1, reg * 2, result);
+    return result;
+}
+
+void vfp_put_double(ARMul_State* state, u64 val, unsigned int reg)
+{
+    LOG_TRACE(Core_ARM11, "VFP put double: s[%d-%d] <= [%08x-%08x]", reg * 2 + 1, reg * 2, (u32)(val >> 32), (u32)(val & 0xffffffff));
+    state->ExtReg[reg*2] = (u32) (val & 0xffffffff);
+    state->ExtReg[reg*2+1] = (u32) (val>>32);
+}
+
+/*
+ * Process bitmask of exception conditions. (from vfpmodule.c)
+ */
+void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "VFP: raising exceptions %08x", exceptions);
+
+    if (exceptions == VFP_EXCEPTION_ERROR) {
+        LOG_CRITICAL(Core_ARM11, "unhandled bounce %x", inst);
+        exit(-1);
+    }
+
+    /*
+     * If any of the status flags are set, update the FPSCR.
+     * Comparison instructions always return at least one of
+     * these flags set.
+     */
+    if (exceptions & (FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG))
+        fpscr &= ~(FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG);
+
+    fpscr |= exceptions;
+
+    state->VFP[VFP_FPSCR] = fpscr;
+}
diff --git a/tests/skyeye_interpreter/skyeye_common/vfp/vfp.h b/tests/skyeye_interpreter/skyeye_common/vfp/vfp.h
new file mode 100644
index 00000000..6783f89c
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/vfp/vfp.h
@@ -0,0 +1,43 @@
+/*
+    vfp/vfp.h - ARM VFPv3 emulation unit - vfp interface
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#pragma once
+
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp_helper.h" /* for references to cdp SoftFloat functions */
+
+#define VFP_DEBUG_UNTESTED(x) LOG_TRACE(Core_ARM11, "in func %s, " #x " untested", __FUNCTION__);
+#define CHECK_VFP_ENABLED
+#define CHECK_VFP_CDP_RET vfp_raise_exceptions(cpu, ret, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+void VFPInit(ARMul_State* state);
+
+s32 vfp_get_float(ARMul_State* state, u32 reg);
+void vfp_put_float(ARMul_State* state, s32 val, u32 reg);
+u64 vfp_get_double(ARMul_State* state, u32 reg);
+void vfp_put_double(ARMul_State* state, u64 val, u32 reg);
+void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpscr);
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+
+void VMOVBRS(ARMul_State* state, u32 to_arm, u32 t, u32 n, u32* value);
+void VMOVBRRD(ARMul_State* state, u32 to_arm, u32 t, u32 t2, u32 n, u32* value1, u32* value2);
+void VMOVBRRSS(ARMul_State* state, u32 to_arm, u32 t, u32 t2, u32 n, u32* value1, u32* value2);
+void VMOVI(ARMul_State* state, u32 single, u32 d, u32 imm);
+void VMOVR(ARMul_State* state, u32 single, u32 d, u32 imm);
diff --git a/tests/skyeye_interpreter/skyeye_common/vfp/vfp_helper.h b/tests/skyeye_interpreter/skyeye_common/vfp/vfp_helper.h
new file mode 100644
index 00000000..f31dffed
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/vfp/vfp_helper.h
@@ -0,0 +1,450 @@
+/*
+    vfp/vfp.h - ARM VFPv3 emulation unit - SoftFloat lib helper
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ *  The following code is derivative from Linux Android kernel vfp
+ *  floating point support.
+ *
+ *  Copyright (C) 2004 ARM Limited.
+ *  Written by Deep Blue Solutions Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#pragma once
+
+#include <cstdio>
+#include "common/common_types.h"
+#include "tests/skyeye_interpreter/skyeye_common/armstate.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h"
+
+#define do_div(n, base) {n/=base;}
+
+enum : u32 {
+    FOP_MASK  = 0x00b00040,
+    FOP_FMAC  = 0x00000000,
+    FOP_FNMAC = 0x00000040,
+    FOP_FMSC  = 0x00100000,
+    FOP_FNMSC = 0x00100040,
+    FOP_FMUL  = 0x00200000,
+    FOP_FNMUL = 0x00200040,
+    FOP_FADD  = 0x00300000,
+    FOP_FSUB  = 0x00300040,
+    FOP_FDIV  = 0x00800000,
+    FOP_EXT   = 0x00b00040
+};
+
+#define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4)
+
+enum : u32 {
+    FEXT_MASK   = 0x000f0080,
+    FEXT_FCPY   = 0x00000000,
+    FEXT_FABS   = 0x00000080,
+    FEXT_FNEG   = 0x00010000,
+    FEXT_FSQRT  = 0x00010080,
+    FEXT_FCMP   = 0x00040000,
+    FEXT_FCMPE  = 0x00040080,
+    FEXT_FCMPZ  = 0x00050000,
+    FEXT_FCMPEZ = 0x00050080,
+    FEXT_FCVT   = 0x00070080,
+    FEXT_FUITO  = 0x00080000,
+    FEXT_FSITO  = 0x00080080,
+    FEXT_FTOUI  = 0x000c0000,
+    FEXT_FTOUIZ = 0x000c0080,
+    FEXT_FTOSI  = 0x000d0000,
+    FEXT_FTOSIZ = 0x000d0080
+};
+
+#define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+
+#define vfp_get_sd(inst)  ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22)
+#define vfp_get_dd(inst)  ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18)
+#define vfp_get_sm(inst)  ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5)
+#define vfp_get_dm(inst)  ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1)
+#define vfp_get_sn(inst)  ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+#define vfp_get_dn(inst)  ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3)
+
+#define vfp_single(inst)  (((inst) & 0x0000f00) == 0xa00)
+
+inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
+{
+    if (shift) {
+        if (shift < 32)
+            val = val >> shift | ((val << (32 - shift)) != 0);
+        else
+            val = val != 0;
+    }
+    return val;
+}
+
+inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
+{
+    if (shift) {
+        if (shift < 64)
+            val = val >> shift | ((val << (64 - shift)) != 0);
+        else
+            val = val != 0;
+    }
+    return val;
+}
+
+inline u32 vfp_hi64to32jamming(u64 val)
+{
+    u32 v;
+    u32 highval = val >> 32;
+    u32 lowval = val & 0xffffffff;
+
+    if (lowval >= 1)
+        v = highval | 1;
+    else
+        v = highval;
+
+    return v;
+}
+
+inline void add128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+    *resl = nl + ml;
+    *resh = nh + mh;
+    if (*resl < nl)
+        *resh += 1;
+}
+
+inline void sub128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+    *resl = nl - ml;
+    *resh = nh - mh;
+    if (*resl > nl)
+        *resh -= 1;
+}
+
+inline void mul64to128(u64* resh, u64* resl, u64 n, u64 m)
+{
+    u32 nh, nl, mh, ml;
+    u64 rh, rma, rmb, rl;
+
+    nl = static_cast<u32>(n);
+    ml = static_cast<u32>(m);
+    rl = (u64)nl * ml;
+
+    nh = n >> 32;
+    rma = (u64)nh * ml;
+
+    mh = m >> 32;
+    rmb = (u64)nl * mh;
+    rma += rmb;
+
+    rh = (u64)nh * mh;
+    rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
+
+    rma <<= 32;
+    rl += rma;
+    rh += (rl < rma);
+
+    *resl = rl;
+    *resh = rh;
+}
+
+inline void shift64left(u64* resh, u64* resl, u64 n)
+{
+    *resh = n >> 63;
+    *resl = n << 1;
+}
+
+inline u64 vfp_hi64multiply64(u64 n, u64 m)
+{
+    u64 rh, rl;
+    mul64to128(&rh, &rl, n, m);
+    return rh | (rl != 0);
+}
+
+inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
+{
+    u64 mh, ml, remh, reml, termh, terml, z;
+
+    if (nh >= m)
+        return ~0ULL;
+    mh = m >> 32;
+    if (mh << 32 <= nh) {
+        z = 0xffffffff00000000ULL;
+    } else {
+        z = nh;
+        do_div(z, mh);
+        z <<= 32;
+    }
+    mul64to128(&termh, &terml, m, z);
+    sub128(&remh, &reml, nh, nl, termh, terml);
+    ml = m << 32;
+    while ((s64)remh < 0) {
+        z -= 0x100000000ULL;
+        add128(&remh, &reml, remh, reml, mh, ml);
+    }
+    remh = (remh << 32) | (reml >> 32);
+    if (mh << 32 <= remh) {
+        z |= 0xffffffff;
+    } else {
+        do_div(remh, mh);
+        z |= remh;
+    }
+    return z;
+}
+
+// Operations on unpacked elements
+#define vfp_sign_negate(sign) (sign ^ 0x8000)
+
+// Single-precision
+struct vfp_single {
+    s16	exponent;
+    u16	sign;
+    u32	significand;
+};
+
+// VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
+// VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
+// VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
+// which are not propagated to the float upon packing.
+#define VFP_SINGLE_MANTISSA_BITS (23)
+#define VFP_SINGLE_EXPONENT_BITS (8)
+#define VFP_SINGLE_LOW_BITS      (32 - VFP_SINGLE_MANTISSA_BITS - 2)
+#define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1)
+
+// The bit in an unpacked float which indicates that it is a quiet NaN
+#define VFP_SINGLE_SIGNIFICAND_QNAN	(1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
+
+// Operations on packed single-precision numbers
+#define vfp_single_packed_sign(v)     ((v) & 0x80000000)
+#define vfp_single_packed_negate(v)   ((v) ^ 0x80000000)
+#define vfp_single_packed_abs(v)      ((v) & ~0x80000000)
+#define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
+#define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
+
+enum : u32 {
+    VFP_NUMBER     = (1 << 0),
+    VFP_ZERO       = (1 << 1),
+    VFP_DENORMAL   = (1 << 2),
+    VFP_INFINITY   = (1 << 3),
+    VFP_NAN        = (1 << 4),
+    VFP_NAN_SIGNAL = (1 << 5),
+
+    VFP_QNAN       = (VFP_NAN),
+    VFP_SNAN       = (VFP_NAN|VFP_NAN_SIGNAL)
+};
+
+inline int vfp_single_type(const vfp_single* s)
+{
+    int type = VFP_NUMBER;
+    if (s->exponent == 255) {
+        if (s->significand == 0)
+            type = VFP_INFINITY;
+        else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
+            type = VFP_QNAN;
+        else
+            type = VFP_SNAN;
+    } else if (s->exponent == 0) {
+        if (s->significand == 0)
+            type |= VFP_ZERO;
+        else
+            type |= VFP_DENORMAL;
+    }
+    return type;
+}
+
+// Unpack a single-precision float.  Note that this returns the magnitude
+// of the single-precision float mantissa with the 1. if necessary,
+// aligned to bit 30.
+inline void vfp_single_unpack(vfp_single* s, s32 val, u32* fpscr)
+{
+    s->sign = vfp_single_packed_sign(val) >> 16,
+    s->exponent = vfp_single_packed_exponent(val);
+
+    u32 significand = ((u32)val << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
+    if (s->exponent && s->exponent != 255)
+        significand |= 0x40000000;
+    s->significand = significand;
+
+    // If flush-to-zero mode is enabled, turn the denormal into zero.
+    // On a VFPv2 architecture, the sign of the zero is always positive.
+    if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_single_type(s) & VFP_DENORMAL) != 0) {
+        s->sign = 0;
+        s->exponent = 0;
+        s->significand = 0;
+        *fpscr |= FPSCR_IDC;
+    }
+}
+
+// Re-pack a single-precision float. This assumes that the float is
+// already normalised such that the MSB is bit 30, _not_ bit 31.
+inline s32 vfp_single_pack(const vfp_single* s)
+{
+    u32 val = (s->sign << 16) +
+              (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
+              (s->significand >> VFP_SINGLE_LOW_BITS);
+    return (s32)val;
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func);
+
+// Double-precision
+struct vfp_double {
+    s16	exponent;
+    u16	sign;
+    u64	significand;
+};
+
+// VFP_REG_ZERO is a special register number for vfp_get_double
+// which returns (double)0.0.  This is useful for the compare with
+// zero instructions.
+#ifdef CONFIG_VFPv3
+#define VFP_REG_ZERO 32
+#else
+#define VFP_REG_ZERO 16
+#endif
+
+#define VFP_DOUBLE_MANTISSA_BITS (52)
+#define VFP_DOUBLE_EXPONENT_BITS (11)
+#define VFP_DOUBLE_LOW_BITS      (64 - VFP_DOUBLE_MANTISSA_BITS - 2)
+#define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1)
+
+// The bit in an unpacked double which indicates that it is a quiet NaN
+#define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
+
+// Operations on packed single-precision numbers
+#define vfp_double_packed_sign(v)     ((v) & (1ULL << 63))
+#define vfp_double_packed_negate(v)   ((v) ^ (1ULL << 63))
+#define vfp_double_packed_abs(v)      ((v) & ~(1ULL << 63))
+#define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
+#define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
+
+inline int vfp_double_type(const vfp_double* s)
+{
+    int type = VFP_NUMBER;
+    if (s->exponent == 2047) {
+        if (s->significand == 0)
+            type = VFP_INFINITY;
+        else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
+            type = VFP_QNAN;
+        else
+            type = VFP_SNAN;
+    } else if (s->exponent == 0) {
+        if (s->significand == 0)
+            type |= VFP_ZERO;
+        else
+            type |= VFP_DENORMAL;
+    }
+    return type;
+}
+
+// Unpack a double-precision float.  Note that this returns the magnitude
+// of the double-precision float mantissa with the 1. if necessary,
+// aligned to bit 62.
+inline void vfp_double_unpack(vfp_double* s, s64 val, u32* fpscr)
+{
+    s->sign = vfp_double_packed_sign(val) >> 48;
+    s->exponent = vfp_double_packed_exponent(val);
+
+    u64 significand = ((u64)val << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
+    if (s->exponent && s->exponent != 2047)
+        significand |= (1ULL << 62);
+    s->significand = significand;
+
+    // If flush-to-zero mode is enabled, turn the denormal into zero.
+    // On a VFPv2 architecture, the sign of the zero is always positive.
+    if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_double_type(s) & VFP_DENORMAL) != 0) {
+        s->sign = 0;
+        s->exponent = 0;
+        s->significand = 0;
+        *fpscr |= FPSCR_IDC;
+    }
+}
+
+// Re-pack a double-precision float. This assumes that the float is
+// already normalised such that the MSB is bit 30, _not_ bit 31.
+inline s64 vfp_double_pack(const vfp_double* s)
+{
+    u64 val = ((u64)s->sign << 48) +
+              ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
+              (s->significand >> VFP_DOUBLE_LOW_BITS);
+    return (s64)val;
+}
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
+
+// A special flag to tell the normalisation code not to normalise.
+#define VFP_NAN_FLAG 0x100
+
+// A bit pattern used to indicate the initial (unset) value of the
+// exception mask, in case nothing handles an instruction.  This
+// doesn't include the NAN flag, which get masked out before
+// we check for an error.
+#define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG)
+
+// A flag to tell vfp instruction type.
+//  OP_SCALAR - This operation always operates in scalar mode
+//  OP_SD     - The instruction exceptionally writes to a single precision result.
+//  OP_DD     - The instruction exceptionally writes to a double precision result.
+//  OP_SM     - The instruction exceptionally reads from a single precision operand.
+enum : u32 {
+    OP_SCALAR = (1 << 0),
+    OP_SD     = (1 << 1),
+    OP_DD     = (1 << 1),
+    OP_SM     = (1 << 2)
+};
+
+struct op {
+    u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr);
+    u32 flags;
+};
+
+inline u32 fls(u32 x)
+{
+    int r = 32;
+
+    if (!x)
+        return 0;
+    if (!(x & 0xffff0000u)) {
+        x <<= 16;
+        r -= 16;
+    }
+    if (!(x & 0xff000000u)) {
+        x <<= 8;
+        r -= 8;
+    }
+    if (!(x & 0xf0000000u)) {
+        x <<= 4;
+        r -= 4;
+    }
+    if (!(x & 0xc0000000u)) {
+        x <<= 2;
+        r -= 2;
+    }
+    if (!(x & 0x80000000u)) {
+        x <<= 1;
+        r -= 1;
+    }
+    return r;
+
+}
+
+u32 vfp_double_multiply(vfp_double* vdd, vfp_double* vdn, vfp_double* vdm, u32 fpscr);
+u32 vfp_double_add(vfp_double* vdd, vfp_double* vdn, vfp_double *vdm, u32 fpscr);
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, vfp_double* vd, u32 fpscr, u32 exceptions, const char* func);
diff --git a/tests/skyeye_interpreter/skyeye_common/vfp/vfpdouble.cpp b/tests/skyeye_interpreter/skyeye_common/vfp/vfpdouble.cpp
new file mode 100644
index 00000000..1c33d418
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/vfp/vfpdouble.cpp
@@ -0,0 +1,1262 @@
+/*
+    vfp/vfpdouble.c - ARM VFPv3 emulation unit - SoftFloat double instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+
+#include <algorithm>
+#include "common/logging/log.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp_helper.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h"
+
+static struct vfp_double vfp_double_default_qnan = {
+    2047,
+    0,
+    VFP_DOUBLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_double_dump(const char *str, struct vfp_double *d)
+{
+    LOG_TRACE(Core_ARM11, "VFP: %s: sign=%d exponent=%d significand=%016llx",
+             str, d->sign != 0, d->exponent, d->significand);
+}
+
+static void vfp_double_normalise_denormal(struct vfp_double *vd)
+{
+    int bits = 31 - fls((u32)(vd->significand >> 32));
+    if (bits == 31)
+        bits = 63 - fls((u32)vd->significand);
+
+    vfp_double_dump("normalise_denormal: in", vd);
+
+    if (bits) {
+        vd->exponent -= bits - 1;
+        vd->significand <<= bits;
+    }
+
+    vfp_double_dump("normalise_denormal: out", vd);
+}
+
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
+{
+    u64 significand, incr;
+    int exponent, shift, underflow;
+    u32 rmode;
+
+    vfp_double_dump("pack: in", vd);
+
+    /*
+     * Infinities and NaNs are a special case.
+     */
+    if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
+        goto pack;
+
+    /*
+     * Special-case zero.
+     */
+    if (vd->significand == 0) {
+        vd->exponent = 0;
+        goto pack;
+    }
+
+    exponent = vd->exponent;
+    significand = vd->significand;
+
+    shift = 32 - fls((u32)(significand >> 32));
+    if (shift == 32)
+        shift = 64 - fls((u32)significand);
+    if (shift) {
+        exponent -= shift;
+        significand <<= shift;
+    }
+
+#if 1
+    vd->exponent = exponent;
+    vd->significand = significand;
+    vfp_double_dump("pack: normalised", vd);
+#endif
+
+    /*
+     * Tiny number?
+     */
+    underflow = exponent < 0;
+    if (underflow) {
+        significand = vfp_shiftright64jamming(significand, -exponent);
+        exponent = 0;
+#if 1
+        vd->exponent = exponent;
+        vd->significand = significand;
+        vfp_double_dump("pack: tiny number", vd);
+#endif
+        if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
+            underflow = 0;
+    }
+
+    /*
+     * Select rounding increment.
+     */
+    incr = 0;
+    rmode = fpscr & FPSCR_RMODE_MASK;
+
+    if (rmode == FPSCR_ROUND_NEAREST) {
+        incr = 1ULL << VFP_DOUBLE_LOW_BITS;
+        if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
+            incr -= 1;
+    } else if (rmode == FPSCR_ROUND_TOZERO) {
+        incr = 0;
+    } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
+        incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
+
+    LOG_TRACE(Core_ARM11, "VFP: rounding increment = 0x%08llx", incr);
+
+    /*
+     * Is our rounding going to overflow?
+     */
+    if ((significand + incr) < significand) {
+        exponent += 1;
+        significand = (significand >> 1) | (significand & 1);
+        incr >>= 1;
+#if 1
+        vd->exponent = exponent;
+        vd->significand = significand;
+        vfp_double_dump("pack: overflow", vd);
+#endif
+    }
+
+    /*
+     * If any of the low bits (which will be shifted out of the
+     * number) are non-zero, the result is inexact.
+     */
+    if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
+        exceptions |= FPSCR_IXC;
+
+    /*
+     * Do our rounding.
+     */
+    significand += incr;
+
+    /*
+     * Infinity?
+     */
+    if (exponent >= 2046) {
+        exceptions |= FPSCR_OFC | FPSCR_IXC;
+        if (incr == 0) {
+            vd->exponent = 2045;
+            vd->significand = 0x7fffffffffffffffULL;
+        } else {
+            vd->exponent = 2047;		/* infinity */
+            vd->significand = 0;
+        }
+    } else {
+        if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
+            exponent = 0;
+        if (exponent || significand > 0x8000000000000000ULL)
+            underflow = 0;
+        if (underflow)
+            exceptions |= FPSCR_UFC;
+        vd->exponent = exponent;
+        vd->significand = significand >> 1;
+    }
+
+pack:
+    vfp_double_dump("pack: final", vd);
+    {
+        s64 d = vfp_double_pack(vd);
+        LOG_TRACE(Core_ARM11, "VFP: %s: d(d%d)=%016llx exceptions=%08x", func,
+                 dd, d, exceptions);
+        vfp_put_double(state, d, dd);
+    }
+    return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
+                  struct vfp_double *vdm, u32 fpscr)
+{
+    struct vfp_double *nan;
+    int tn, tm = 0;
+
+    tn = vfp_double_type(vdn);
+
+    if (vdm)
+        tm = vfp_double_type(vdm);
+
+    if (fpscr & FPSCR_DEFAULT_NAN)
+        /*
+         * Default NaN mode - always returns a quiet NaN
+         */
+        nan = &vfp_double_default_qnan;
+    else {
+        /*
+         * Contemporary mode - select the first signalling
+         * NAN, or if neither are signalling, the first
+         * quiet NAN.
+         */
+        if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+            nan = vdn;
+        else
+            nan = vdm;
+        /*
+         * Make the NaN quiet.
+         */
+        nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+    }
+
+    *vdd = *nan;
+
+    /*
+     * If one was a signalling NAN, raise invalid operation.
+     */
+    return (tn == VFP_SNAN || tm == VFP_SNAN) ? (u32)FPSCR_IOC : (u32)VFP_NAN_FLAG;
+}
+
+/*
+ * Extended operations
+ */
+static u32 vfp_double_fabs(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_put_double(state, vfp_double_packed_abs(vfp_get_double(state, dm)), dd);
+    return 0;
+}
+
+static u32 vfp_double_fcpy(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_put_double(state, vfp_get_double(state, dm), dd);
+    return 0;
+}
+
+static u32 vfp_double_fneg(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_put_double(state, vfp_double_packed_negate(vfp_get_double(state, dm)), dd);
+    return 0;
+}
+
+static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double vdm, vdd, *vdp;
+    int ret, tm;
+
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+
+    tm = vfp_double_type(&vdm);
+    if (tm & (VFP_NAN|VFP_INFINITY)) {
+        vdp = &vdd;
+
+        if (tm & VFP_NAN)
+            ret = vfp_propagate_nan(vdp, &vdm, nullptr, fpscr);
+        else if (vdm.sign == 0) {
+sqrt_copy:
+            vdp = &vdm;
+            ret = 0;
+        } else {
+sqrt_invalid:
+            vdp = &vfp_double_default_qnan;
+            ret = FPSCR_IOC;
+        }
+        vfp_put_double(state, vfp_double_pack(vdp), dd);
+        return ret;
+    }
+
+    /*
+     * sqrt(+/- 0) == +/- 0
+     */
+    if (tm & VFP_ZERO)
+        goto sqrt_copy;
+
+    /*
+     * Normalise a denormalised number
+     */
+    if (tm & VFP_DENORMAL)
+        vfp_double_normalise_denormal(&vdm);
+
+    /*
+     * sqrt(<0) = invalid
+     */
+    if (vdm.sign)
+        goto sqrt_invalid;
+
+    vfp_double_dump("sqrt", &vdm);
+
+    /*
+     * Estimate the square root.
+     */
+    vdd.sign = 0;
+    vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
+    vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
+
+    vfp_double_dump("sqrt estimate1", &vdd);
+
+    vdm.significand >>= 1 + (vdm.exponent & 1);
+    vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
+
+    vfp_double_dump("sqrt estimate2", &vdd);
+
+    /*
+     * And now adjust.
+     */
+    if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
+        if (vdd.significand < 2) {
+            vdd.significand = ~0ULL;
+        } else {
+            u64 termh, terml, remh, reml;
+            vdm.significand <<= 2;
+            mul64to128(&termh, &terml, vdd.significand, vdd.significand);
+            sub128(&remh, &reml, vdm.significand, 0, termh, terml);
+            while ((s64)remh < 0) {
+                vdd.significand -= 1;
+                shift64left(&termh, &terml, vdd.significand);
+                terml |= 1;
+                add128(&remh, &reml, remh, reml, termh, terml);
+            }
+            vdd.significand |= (remh | reml) != 0;
+        }
+    }
+    vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
+
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u32 fpscr)
+{
+    s64 d, m;
+    u32 ret = 0;
+
+    LOG_TRACE(Core_ARM11, "In %s, state=0x%p, fpscr=0x%x", __FUNCTION__, state, fpscr);
+    m = vfp_get_double(state, dm);
+    if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
+        ret |= FPSCR_CFLAG | FPSCR_VFLAG;
+        if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+            /*
+             * Signalling NaN, or signalling on quiet NaN
+             */
+            ret |= FPSCR_IOC;
+    }
+
+    d = vfp_get_double(state, dd);
+    if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
+        ret |= FPSCR_CFLAG | FPSCR_VFLAG;
+        if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+            /*
+             * Signalling NaN, or signalling on quiet NaN
+             */
+            ret |= FPSCR_IOC;
+    }
+
+    if (ret == 0) {
+        //printf("In %s, d=%lld, m =%lld\n ", __FUNCTION__, d, m);
+        if (d == m || vfp_double_packed_abs(d | m) == 0) {
+            /*
+             * equal
+             */
+            ret |= FPSCR_ZFLAG | FPSCR_CFLAG;
+            //printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret);
+        } else if (vfp_double_packed_sign(d ^ m)) {
+            /*
+             * different signs
+             */
+            if (vfp_double_packed_sign(d))
+                /*
+                 * d is negative, so d < m
+                 */
+                ret |= FPSCR_NFLAG;
+            else
+                /*
+                 * d is positive, so d > m
+                 */
+                ret |= FPSCR_CFLAG;
+        } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
+            /*
+             * d < m
+             */
+            ret |= FPSCR_NFLAG;
+        } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
+            /*
+             * d > m
+             */
+            ret |= FPSCR_CFLAG;
+        }
+    }
+    LOG_TRACE(Core_ARM11, "In %s, state=0x%p, ret=0x%x", __FUNCTION__, state, ret);
+
+    return ret;
+}
+
+static u32 vfp_double_fcmp(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_compare(state, dd, 0, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpe(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_compare(state, dd, 1, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_compare(state, dd, 0, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+    struct vfp_double vdm;
+    struct vfp_single vsd;
+    int tm;
+    u32 exceptions = 0;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+
+    tm = vfp_double_type(&vdm);
+
+    /*
+     * If we have a signalling NaN, signal invalid operation.
+     */
+    if (tm == VFP_SNAN)
+        exceptions = FPSCR_IOC;
+
+    if (tm & VFP_DENORMAL)
+        vfp_double_normalise_denormal(&vdm);
+
+    vsd.sign = vdm.sign;
+    vsd.significand = vfp_hi64to32jamming(vdm.significand);
+
+    /*
+     * If we have an infinity or a NaN, the exponent must be 255
+     */
+    if (tm & (VFP_INFINITY|VFP_NAN)) {
+        vsd.exponent = 255;
+        if (tm == VFP_QNAN)
+            vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+        goto pack_nan;
+    } else if (tm & VFP_ZERO)
+        vsd.exponent = 0;
+    else
+        vsd.exponent = vdm.exponent - (1023 - 127);
+
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts");
+
+pack_nan:
+    vfp_put_float(state, vfp_single_pack(&vsd), sd);
+    return exceptions;
+}
+
+static u32 vfp_double_fuito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    struct vfp_double vdm;
+    u32 m = vfp_get_float(state, dm);
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vdm.sign = 0;
+    vdm.exponent = 1023 + 63 - 1;
+    vdm.significand = (u64)m;
+
+    return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fuito");
+}
+
+static u32 vfp_double_fsito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    struct vfp_double vdm;
+    u32 m = vfp_get_float(state, dm);
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vdm.sign = (m & 0x80000000) >> 16;
+    vdm.exponent = 1023 + 63 - 1;
+    vdm.significand = vdm.sign ? (~m + 1) : m;
+
+    return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fsito");
+}
+
+static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+    struct vfp_double vdm;
+    u32 d, exceptions = 0;
+    int rmode = fpscr & FPSCR_RMODE_MASK;
+    int tm;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+
+    /*
+     * Do we have a denormalised number?
+     */
+    tm = vfp_double_type(&vdm);
+    if (tm & VFP_DENORMAL)
+        exceptions |= FPSCR_IDC;
+
+    if (tm & VFP_NAN)
+        vdm.sign = 1;
+
+    if (vdm.exponent >= 1023 + 32) {
+        d = vdm.sign ? 0 : 0xffffffff;
+        exceptions = FPSCR_IOC;
+    } else if (vdm.exponent >= 1023 - 1) {
+        int shift = 1023 + 63 - vdm.exponent;
+        u64 rem, incr = 0;
+
+        /*
+         * 2^0 <= m < 2^32-2^8
+         */
+        d = (u32)((vdm.significand << 1) >> shift);
+        rem = vdm.significand << (65 - shift);
+
+        if (rmode == FPSCR_ROUND_NEAREST) {
+            incr = 0x8000000000000000ULL;
+            if ((d & 1) == 0)
+                incr -= 1;
+        } else if (rmode == FPSCR_ROUND_TOZERO) {
+            incr = 0;
+        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+            incr = ~0ULL;
+        }
+
+        if ((rem + incr) < rem) {
+            if (d < 0xffffffff)
+                d += 1;
+            else
+                exceptions |= FPSCR_IOC;
+        }
+
+        if (d && vdm.sign) {
+            d = 0;
+            exceptions |= FPSCR_IOC;
+        } else if (rem)
+            exceptions |= FPSCR_IXC;
+    } else {
+        d = 0;
+        if (vdm.exponent | vdm.significand) {
+            exceptions |= FPSCR_IXC;
+            if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+                d = 1;
+            else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
+                d = 0;
+                exceptions |= FPSCR_IOC;
+            }
+        }
+    }
+
+    LOG_TRACE(Core_ARM11, "VFP: ftoui: d(s%d)=%08x exceptions=%08x", sd, d, exceptions);
+
+    vfp_put_float(state, d, sd);
+
+    return exceptions;
+}
+
+static u32 vfp_double_ftouiz(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_double_ftoui(state, sd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+    struct vfp_double vdm;
+    u32 d, exceptions = 0;
+    int rmode = fpscr & FPSCR_RMODE_MASK;
+    int tm;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+    vfp_double_dump("VDM", &vdm);
+
+    /*
+     * Do we have denormalised number?
+     */
+    tm = vfp_double_type(&vdm);
+    if (tm & VFP_DENORMAL)
+        exceptions |= FPSCR_IDC;
+
+    if (tm & VFP_NAN) {
+        d = 0;
+        exceptions |= FPSCR_IOC;
+    } else if (vdm.exponent >= 1023 + 32) {
+        d = 0x7fffffff;
+        if (vdm.sign)
+            d = ~d;
+        exceptions |= FPSCR_IOC;
+    } else if (vdm.exponent >= 1023 - 1) {
+        int shift = 1023 + 63 - vdm.exponent;	/* 58 */
+        u64 rem, incr = 0;
+
+        d = (u32)((vdm.significand << 1) >> shift);
+        rem = vdm.significand << (65 - shift);
+
+        if (rmode == FPSCR_ROUND_NEAREST) {
+            incr = 0x8000000000000000ULL;
+            if ((d & 1) == 0)
+                incr -= 1;
+        } else if (rmode == FPSCR_ROUND_TOZERO) {
+            incr = 0;
+        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+            incr = ~0ULL;
+        }
+
+        if ((rem + incr) < rem && d < 0xffffffff)
+            d += 1;
+        if (d > (0x7fffffffU + (vdm.sign != 0))) {
+            d = (0x7fffffffU + (vdm.sign != 0));
+            exceptions |= FPSCR_IOC;
+        } else if (rem)
+            exceptions |= FPSCR_IXC;
+
+        if (vdm.sign)
+            d = (~d + 1);
+    } else {
+        d = 0;
+        if (vdm.exponent | vdm.significand) {
+            exceptions |= FPSCR_IXC;
+            if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+                d = 1;
+            else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
+                d = -1;
+        }
+    }
+
+    LOG_TRACE(Core_ARM11, "VFP: ftosi: d(s%d)=%08x exceptions=%08x", sd, d, exceptions);
+
+    vfp_put_float(state, (s32)d, sd);
+
+    return exceptions;
+}
+
+static u32 vfp_double_ftosiz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_double_ftosi(state, dd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_double_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_double_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_double_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_double_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { vfp_double_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_double_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_double_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_double_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { vfp_double_fcvts,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_double_fuito,  OP_SCALAR|OP_SM },   //0x00000010 - FEXT_FUITO
+    { vfp_double_fsito,  OP_SCALAR|OP_SM },   //0x00000011 - FEXT_FSITO
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { vfp_double_ftoui,  OP_SCALAR|OP_SD },   //0x00000018 - FEXT_FTOUI
+    { vfp_double_ftouiz, OP_SCALAR|OP_SD },   //0x00000019 - FEXT_FTOUIZ
+    { vfp_double_ftosi,  OP_SCALAR|OP_SD },   //0x0000001A - FEXT_FTOSI
+    { vfp_double_ftosiz, OP_SCALAR|OP_SD },   //0x0000001B - FEXT_FTOSIZ
+};
+
+static u32
+vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
+                          struct vfp_double *vdm, u32 fpscr)
+{
+    struct vfp_double *vdp;
+    u32 exceptions = 0;
+    int tn, tm;
+
+    tn = vfp_double_type(vdn);
+    tm = vfp_double_type(vdm);
+
+    if (tn & tm & VFP_INFINITY) {
+        /*
+         * Two infinities.  Are they different signs?
+         */
+        if (vdn->sign ^ vdm->sign) {
+            /*
+             * different signs -> invalid
+             */
+            exceptions = FPSCR_IOC;
+            vdp = &vfp_double_default_qnan;
+        } else {
+            /*
+             * same signs -> valid
+             */
+            vdp = vdn;
+        }
+    } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+        /*
+         * One infinity and one number -> infinity
+         */
+        vdp = vdn;
+    } else {
+        /*
+         * 'n' is a NaN of some type
+         */
+        return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+    }
+    *vdd = *vdp;
+    return exceptions;
+}
+
+u32 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,struct vfp_double *vdm, u32 fpscr)
+{
+    u32 exp_diff;
+    u64 m_sig;
+
+    if (vdn->significand & (1ULL << 63) ||
+            vdm->significand & (1ULL << 63)) {
+        LOG_INFO(Core_ARM11, "VFP: bad FP values in %s", __func__);
+        vfp_double_dump("VDN", vdn);
+        vfp_double_dump("VDM", vdm);
+    }
+
+    /*
+     * Ensure that 'n' is the largest magnitude number.  Note that
+     * if 'n' and 'm' have equal exponents, we do not swap them.
+     * This ensures that NaN propagation works correctly.
+     */
+    if (vdn->exponent < vdm->exponent) {
+        std::swap(vdm, vdn);
+    }
+
+    /*
+     * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+     * infinity or a NaN here.
+     */
+    if (vdn->exponent == 2047)
+        return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
+
+    /*
+     * We have two proper numbers, where 'vdn' is the larger magnitude.
+     *
+     * Copy 'n' to 'd' before doing the arithmetic.
+     */
+    *vdd = *vdn;
+
+    /*
+     * Align 'm' with the result.
+     */
+    exp_diff = vdn->exponent - vdm->exponent;
+    m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
+
+    /*
+     * If the signs are different, we are really subtracting.
+     */
+    if (vdn->sign ^ vdm->sign) {
+        m_sig = vdn->significand - m_sig;
+        if ((s64)m_sig < 0) {
+            vdd->sign = vfp_sign_negate(vdd->sign);
+            m_sig = (~m_sig + 1);
+        } else if (m_sig == 0) {
+            vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+                        FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+        }
+    } else {
+        m_sig += vdn->significand;
+    }
+    vdd->significand = m_sig;
+
+    return 0;
+}
+
+u32
+vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
+                    struct vfp_double *vdm, u32 fpscr)
+{
+    vfp_double_dump("VDN", vdn);
+    vfp_double_dump("VDM", vdm);
+
+    /*
+     * Ensure that 'n' is the largest magnitude number.  Note that
+     * if 'n' and 'm' have equal exponents, we do not swap them.
+     * This ensures that NaN propagation works correctly.
+     */
+    if (vdn->exponent < vdm->exponent) {
+        std::swap(vdm, vdn);
+        LOG_TRACE(Core_ARM11, "VFP: swapping M <-> N");
+    }
+
+    vdd->sign = vdn->sign ^ vdm->sign;
+
+    /*
+     * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+     */
+    if (vdn->exponent == 2047) {
+        if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
+            return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+        if ((vdm->exponent | vdm->significand) == 0) {
+            *vdd = vfp_double_default_qnan;
+            return FPSCR_IOC;
+        }
+        vdd->exponent = vdn->exponent;
+        vdd->significand = 0;
+        return 0;
+    }
+
+    /*
+     * If 'm' is zero, the result is always zero.  In this case,
+     * 'n' may be zero or a number, but it doesn't matter which.
+     */
+    if ((vdm->exponent | vdm->significand) == 0) {
+        vdd->exponent = 0;
+        vdd->significand = 0;
+        return 0;
+    }
+
+    /*
+     * We add 2 to the destination exponent for the same reason
+     * as the addition case - though this time we have +1 from
+     * each input operand.
+     */
+    vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
+    vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
+
+    vfp_double_dump("VDD", vdd);
+    return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 fpscr, u32 negate, const char *func)
+{
+    struct vfp_double vdd, vdp, vdn, vdm;
+    u32 exceptions;
+
+    vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
+    if (vdn.exponent == 0 && vdn.significand)
+        vfp_double_normalise_denormal(&vdn);
+
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+    if (vdm.exponent == 0 && vdm.significand)
+        vfp_double_normalise_denormal(&vdm);
+
+    exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
+    if (negate & NEG_MULTIPLY)
+        vdp.sign = vfp_sign_negate(vdp.sign);
+
+    vfp_double_unpack(&vdn, vfp_get_double(state, dd), &fpscr);
+    if (vdn.exponent == 0 && vdn.significand != 0)
+        vfp_double_normalise_denormal(&vdn);
+
+    if (negate & NEG_SUBTRACT)
+        vdn.sign = vfp_sign_negate(vdn.sign);
+
+    exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
+
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_double_fmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_double_fnmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_double_fmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_double_fnmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    struct vfp_double vdd, vdn, vdm;
+    u32 exceptions;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
+    if (vdn.exponent == 0 && vdn.significand)
+        vfp_double_normalise_denormal(&vdn);
+
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+    if (vdm.exponent == 0 && vdm.significand)
+        vfp_double_normalise_denormal(&vdm);
+
+    exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    struct vfp_double vdd, vdn, vdm;
+    u32 exceptions;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
+    if (vdn.exponent == 0 && vdn.significand)
+        vfp_double_normalise_denormal(&vdn);
+
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+    if (vdm.exponent == 0 && vdm.significand)
+        vfp_double_normalise_denormal(&vdm);
+
+    exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+    vdd.sign = vfp_sign_negate(vdd.sign);
+
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    struct vfp_double vdd, vdn, vdm;
+    u32 exceptions;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
+    if (vdn.exponent == 0 && vdn.significand)
+        vfp_double_normalise_denormal(&vdn);
+
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+    if (vdm.exponent == 0 && vdm.significand)
+        vfp_double_normalise_denormal(&vdm);
+
+    exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    struct vfp_double vdd, vdn, vdm;
+    u32 exceptions;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
+    if (vdn.exponent == 0 && vdn.significand)
+        vfp_double_normalise_denormal(&vdn);
+
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+    if (vdm.exponent == 0 && vdm.significand)
+        vfp_double_normalise_denormal(&vdm);
+
+    /*
+     * Subtraction is like addition, but with a negated operand.
+     */
+    vdm.sign = vfp_sign_negate(vdm.sign);
+
+    exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fsub");
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+    struct vfp_double vdd, vdn, vdm;
+    u32 exceptions = 0;
+    int tm, tn;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
+    vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
+
+    vdd.sign = vdn.sign ^ vdm.sign;
+
+    tn = vfp_double_type(&vdn);
+    tm = vfp_double_type(&vdm);
+
+    /*
+     * Is n a NAN?
+     */
+    if (tn & VFP_NAN)
+        goto vdn_nan;
+
+    /*
+     * Is m a NAN?
+     */
+    if (tm & VFP_NAN)
+        goto vdm_nan;
+
+    /*
+     * If n and m are infinity, the result is invalid
+     * If n and m are zero, the result is invalid
+     */
+    if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+        goto invalid;
+
+    /*
+     * If n is infinity, the result is infinity
+     */
+    if (tn & VFP_INFINITY)
+        goto infinity;
+
+    /*
+     * If m is zero, raise div0 exceptions
+     */
+    if (tm & VFP_ZERO)
+        goto divzero;
+
+    /*
+     * If m is infinity, or n is zero, the result is zero
+     */
+    if (tm & VFP_INFINITY || tn & VFP_ZERO)
+        goto zero;
+
+    if (tn & VFP_DENORMAL)
+        vfp_double_normalise_denormal(&vdn);
+    if (tm & VFP_DENORMAL)
+        vfp_double_normalise_denormal(&vdm);
+
+    /*
+     * Ok, we have two numbers, we can perform division.
+     */
+    vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
+    vdm.significand <<= 1;
+    if (vdm.significand <= (2 * vdn.significand)) {
+        vdn.significand >>= 1;
+        vdd.exponent++;
+    }
+    vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
+    if ((vdd.significand & 0x1ff) <= 2) {
+        u64 termh, terml, remh, reml;
+        mul64to128(&termh, &terml, vdm.significand, vdd.significand);
+        sub128(&remh, &reml, vdn.significand, 0, termh, terml);
+        while ((s64)remh < 0) {
+            vdd.significand -= 1;
+            add128(&remh, &reml, remh, reml, 0, vdm.significand);
+        }
+        vdd.significand |= (reml != 0);
+    }
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fdiv");
+
+vdn_nan:
+    exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
+pack:
+    vfp_put_double(state, vfp_double_pack(&vdd), dd);
+    return exceptions;
+
+vdm_nan:
+    exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
+    goto pack;
+
+zero:
+    vdd.exponent = 0;
+    vdd.significand = 0;
+    goto pack;
+
+divzero:
+    exceptions = FPSCR_DZC;
+infinity:
+    vdd.exponent = 2047;
+    vdd.significand = 0;
+    goto pack;
+
+invalid:
+    vfp_put_double(state, vfp_double_pack(&vfp_double_default_qnan), dd);
+    return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+    { vfp_double_fmac,  0 },
+    { vfp_double_fmsc,  0 },
+    { vfp_double_fmul,  0 },
+    { vfp_double_fadd,  0 },
+    { vfp_double_fnmac, 0 },
+    { vfp_double_fnmsc, 0 },
+    { vfp_double_fnmul, 0 },
+    { vfp_double_fsub,  0 },
+    { vfp_double_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x0c)
+#define FREG_IDX(x)	((x) & 3)
+
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+    u32 op = inst & FOP_MASK;
+    u32 exceptions = 0;
+    unsigned int dest;
+    unsigned int dn = vfp_get_dn(inst);
+    unsigned int dm;
+    unsigned int vecitr, veclen, vecstride;
+    struct op *fop;
+
+    LOG_TRACE(Core_ARM11, "In %s", __FUNCTION__);
+    vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
+
+    fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+    /*
+     * fcvtds takes an sN register number as destination, not dN.
+     * It also always operates on scalars.
+     */
+    if (fop->flags & OP_SD)
+        dest = vfp_get_sd(inst);
+    else
+        dest = vfp_get_dd(inst);
+
+    /*
+     * f[us]ito takes a sN operand, not a dN operand.
+     */
+    if (fop->flags & OP_SM)
+        dm = vfp_get_sm(inst);
+    else
+        dm = vfp_get_dm(inst);
+
+    /*
+     * If destination bank is zero, vector length is always '1'.
+     * ARM DDI0100F C5.1.3, C5.3.2.
+     */
+    if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
+        veclen = 0;
+    else
+        veclen = fpscr & FPSCR_LENGTH_MASK;
+
+    LOG_TRACE(Core_ARM11, "VFP: vecstride=%u veclen=%u", vecstride,
+             (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+    if (!fop->fn) {
+        printf("VFP: could not find double op %d\n", FEXT_TO_IDX(inst));
+        goto invalid;
+    }
+
+    for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+        u32 except;
+        char type;
+
+        type = (fop->flags & OP_SD) ? 's' : 'd';
+        (void)type;
+
+        if (op == FOP_EXT)
+            LOG_TRACE(Core_ARM11, "VFP: itr%d (%c%u) = op[%u] (d%u)",
+                     vecitr >> FPSCR_LENGTH_BIT,
+                     type, dest, dn, dm);
+        else
+            LOG_TRACE(Core_ARM11, "VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)",
+                     vecitr >> FPSCR_LENGTH_BIT,
+                     type, dest, dn, FOP_TO_IDX(op), dm);
+
+        except = fop->fn(state, dest, dn, dm, fpscr);
+        LOG_TRACE(Core_ARM11, "VFP: itr%d: exceptions=%08x",
+                 vecitr >> FPSCR_LENGTH_BIT, except);
+
+        exceptions |= except;
+
+        /*
+         * CHECK: It appears to be undefined whether we stop when
+         * we encounter an exception.  We continue.
+         */
+        dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
+        dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
+        if (FREG_BANK(dm) != 0)
+            dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
+    }
+    return exceptions;
+
+invalid:
+    return ~0;
+}
diff --git a/tests/skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp b/tests/skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp
new file mode 100644
index 00000000..4f908351
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/vfp/vfpinstr.cpp
@@ -0,0 +1,1788 @@
+// Copyright 2012 Michael Kang, 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/* Notice: this file should not be compiled as is, and is meant to be
+   included in other files only. */
+
+/* ----------------------------------------------------------------------- */
+/* CDP instructions */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+
+/* ----------------------------------------------------------------------- */
+/* VMLA */
+/* cond 1110 0D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmla_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmla)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmla_inst));
+    vmla_inst *inst_cream = (vmla_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMLA_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmla_inst *inst_cream = (vmla_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmla_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D00 Vn-- Vd-- 101X N1M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmls_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmls)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmls_inst));
+    vmls_inst *inst_cream = (vmls_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMLS_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmls_inst *inst_cream = (vmls_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmls_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VNMLA */
+/* cond 1110 0D01 Vn-- Vd-- 101X N1M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vnmla_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vnmla)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmla_inst));
+    vnmla_inst *inst_cream = (vnmla_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VNMLA_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vnmla_inst *inst_cream = (vnmla_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vnmla_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D01 Vn-- Vd-- 101X N0M0 Vm-- */
+
+#ifdef VFP_INTERPRETER_STRUCT
+struct vnmls_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vnmls)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmls_inst));
+    vnmls_inst *inst_cream = (vnmls_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VNMLS_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vnmls_inst *inst_cream = (vnmls_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vnmls_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VNMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vnmul_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vnmul)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmul_inst));
+    vnmul_inst *inst_cream = (vnmul_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VNMUL_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vnmul_inst *inst_cream = (vnmul_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vnmul_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmul_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmul)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmul_inst));
+    vmul_inst *inst_cream = (vmul_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMUL_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmul_inst *inst_cream = (vmul_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmul_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VADD */
+/* cond 1110 0D11 Vn-- Vd-- 101X N0M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vadd_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vadd)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vadd_inst));
+    vadd_inst *inst_cream = (vadd_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VADD_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vadd_inst *inst_cream = (vadd_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vadd_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VSUB */
+/* cond 1110 0D11 Vn-- Vd-- 101X N1M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vsub_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vsub)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vsub_inst));
+    vsub_inst *inst_cream = (vsub_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VSUB_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vsub_inst *inst_cream = (vsub_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vsub_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VDIV */
+/* cond 1110 1D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vdiv_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vdiv)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vdiv_inst));
+    vdiv_inst *inst_cream = (vdiv_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VDIV_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vdiv_inst *inst_cream = (vdiv_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vdiv_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMOVI move immediate */
+/* cond 1110 1D11 im4H Vd-- 101X 0000 im4L */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmovi_inst {
+    unsigned int single;
+    unsigned int d;
+    unsigned int imm;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovi)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovi_inst));
+    vmovi_inst *inst_cream = (vmovi_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->d      = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+    unsigned int imm8 = BITS(inst, 16, 19) << 4 | BITS(inst, 0, 3);
+    if (inst_cream->single)
+        inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0x1f : 0)<<25 | BITS(imm8, 0, 5)<<19;
+    else
+        inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0xff : 0)<<22 | BITS(imm8, 0, 5)<<16;
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMOVI_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovi_inst *inst_cream = (vmovi_inst *)inst_base->component;
+
+        VMOVI(cpu, inst_cream->single, inst_cream->d, inst_cream->imm);
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmovi_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMOVR move register */
+/* cond 1110 1D11 0000 Vd-- 101X 01M0 Vm-- */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmovr_inst {
+    unsigned int single;
+    unsigned int d;
+    unsigned int m;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovr_inst));
+    vmovr_inst *inst_cream = (vmovr_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->d      = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+    inst_cream->m      = (inst_cream->single ? BITS(inst, 0, 3)<<1 | BIT(inst, 5) : BITS(inst, 0, 3) | BIT(inst, 5)<<4);
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMOVR_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovr_inst *inst_cream = (vmovr_inst *)inst_base->component;
+
+        VMOVR(cpu, inst_cream->single, inst_cream->d, inst_cream->m);
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmovr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VABS */
+/* cond 1110 1D11 0000 Vd-- 101X 11M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vabs_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+} vabs_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vabs)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vabs_inst));
+    vabs_inst *inst_cream = (vabs_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VABS_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vabs_inst *inst_cream = (vabs_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vabs_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VNEG */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+
+#ifdef VFP_INTERPRETER_STRUCT
+struct vneg_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vneg)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vneg_inst));
+    vneg_inst *inst_cream = (vneg_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VNEG_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vneg_inst *inst_cream = (vneg_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vneg_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VSQRT */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vsqrt_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vsqrt)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vsqrt_inst));
+    vsqrt_inst *inst_cream = (vsqrt_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VSQRT_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vsqrt_inst *inst_cream = (vsqrt_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vsqrt_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 1 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vcmp_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vcmp)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcmp_inst));
+    vcmp_inst *inst_cream = (vcmp_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VCMP_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vcmp_inst *inst_cream = (vcmp_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vcmp_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 2 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vcmp2_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vcmp2)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcmp2_inst));
+    vcmp2_inst *inst_cream = (vcmp2_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VCMP2_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vcmp2_inst *inst_cream = (vcmp2_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vcmp2_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBDS between double and single */
+/* cond 1110 1D11 0111 Vd-- 101X 11M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vcvtbds_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vcvtbds)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbds_inst));
+    vcvtbds_inst *inst_cream = (vcvtbds_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VCVTBDS_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vcvtbds_inst *inst_cream = (vcvtbds_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vcvtbds_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFF between floating point and fixed point */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vcvtbff_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vcvtbff)(unsigned int inst, int index)
+{
+    VFP_DEBUG_UNTESTED(VCVTBFF);
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbff_inst));
+    vcvtbff_inst *inst_cream = (vcvtbff_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VCVTBFF_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vcvtbff_inst *inst_cream = (vcvtbff_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vcvtbff_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFI between floating point and integer */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vcvtbfi_inst {
+    unsigned int instr;
+    unsigned int dp_operation;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vcvtbfi)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbfi_inst));
+    vcvtbfi_inst *inst_cream = (vcvtbfi_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VCVTBFI_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vcvtbfi_inst *inst_cream = (vcvtbfi_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_FPSCR]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vcvtbfi_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* MRC / MCR instructions */
+/* cond 1110 AAAL XXXX XXXX 101C XBB1 XXXX */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRS between register and single precision */
+/* cond 1110 000o Vn-- Rt-- 1010 N001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmovbrs_inst {
+    unsigned int to_arm;
+    unsigned int t;
+    unsigned int n;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrs)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrs_inst));
+    vmovbrs_inst *inst_cream = (vmovbrs_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->to_arm = BIT(inst, 20) == 1;
+    inst_cream->t      = BITS(inst, 12, 15);
+    inst_cream->n      = BIT(inst, 7) | BITS(inst, 16, 19)<<1;
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMOVBRS_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbrs_inst *inst_cream = (vmovbrs_inst *)inst_base->component;
+
+        VMOVBRS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->n, &(cpu->Reg[inst_cream->t]));
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmovbrs_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMSR */
+/* cond 1110 1110 reg- Rt-- 1010 0001 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmsr_inst {
+    unsigned int reg;
+    unsigned int Rt;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmsr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmsr_inst));
+    vmsr_inst *inst_cream = (vmsr_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->reg = BITS(inst, 16, 19);
+    inst_cream->Rt  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMSR_INST:
+{
+    if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+        /* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled ,
+           and in privileged mode */
+        /* Exceptions must be checked, according to v7 ref manual */
+        CHECK_VFP_ENABLED;
+
+        vmsr_inst* const inst_cream = (vmsr_inst*)inst_base->component;
+
+        unsigned int reg = inst_cream->reg;
+        unsigned int rt  = inst_cream->Rt;
+
+        if (reg == 1)
+        {
+            cpu->VFP[VFP_FPSCR] = cpu->Reg[rt];
+        }
+        else if (cpu->InAPrivilegedMode())
+        {
+            if (reg == 8)
+                cpu->VFP[VFP_FPEXC] = cpu->Reg[rt];
+            else if (reg == 9)
+                cpu->VFP[VFP_FPINST] = cpu->Reg[rt];
+            else if (reg == 10)
+                cpu->VFP[VFP_FPINST2] = cpu->Reg[rt];
+        }
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmsr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRC register to scalar */
+/* cond 1110 0XX0 Vd-- Rt-- 1011 DXX1 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmovbrc_inst {
+    unsigned int esize;
+    unsigned int index;
+    unsigned int d;
+    unsigned int t;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrc)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrc_inst));
+    vmovbrc_inst *inst_cream = (vmovbrc_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+    inst_cream->t     = BITS(inst, 12, 15);
+    /* VFP variant of instruction */
+    inst_cream->esize = 32;
+    inst_cream->index = BIT(inst, 21);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMOVBRC_INST:
+{
+    if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbrc_inst* const inst_cream = (vmovbrc_inst*)inst_base->component;
+
+        cpu->ExtReg[(2 * inst_cream->d) + inst_cream->index] = cpu->Reg[inst_cream->t];
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmovbrc_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMRS */
+/* cond 1110 1111 CRn- Rt-- 1010 0001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmrs_inst {
+    unsigned int reg;
+    unsigned int Rt;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmrs)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmrs_inst));
+    vmrs_inst *inst_cream = (vmrs_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->reg = BITS(inst, 16, 19);
+    inst_cream->Rt  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMRS_INST:
+{
+    if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+        /* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled,
+           and in privileged mode */
+        /* Exceptions must be checked, according to v7 ref manual */
+        CHECK_VFP_ENABLED;
+
+        vmrs_inst* const inst_cream = (vmrs_inst*)inst_base->component;
+
+        unsigned int reg = inst_cream->reg;
+        unsigned int rt  = inst_cream->Rt;
+
+        if (reg == 1) // FPSCR
+        {
+            if (rt != 15)
+            {
+                cpu->Reg[rt] = cpu->VFP[VFP_FPSCR];
+            }
+            else
+            {
+                cpu->NFlag = (cpu->VFP[VFP_FPSCR] >> 31) & 1;
+                cpu->ZFlag = (cpu->VFP[VFP_FPSCR] >> 30) & 1;
+                cpu->CFlag = (cpu->VFP[VFP_FPSCR] >> 29) & 1;
+                cpu->VFlag = (cpu->VFP[VFP_FPSCR] >> 28) & 1;
+            }
+        }
+        else if (reg == 0)
+        {
+            cpu->Reg[rt] = cpu->VFP[VFP_FPSID];
+        }
+        else if (reg == 6)
+        {
+            cpu->Reg[rt] = cpu->VFP[VFP_MVFR1];
+        }
+        else if (reg == 7)
+        {
+            cpu->Reg[rt] = cpu->VFP[VFP_MVFR0];
+        }
+        else if (cpu->InAPrivilegedMode())
+        {
+            if (reg == 8)
+                cpu->Reg[rt] = cpu->VFP[VFP_FPEXC];
+            else if (reg == 9)
+                cpu->Reg[rt] = cpu->VFP[VFP_FPINST];
+            else if (reg == 10)
+                cpu->Reg[rt] = cpu->VFP[VFP_FPINST2];
+        }
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmrs_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBCR scalar to register */
+/* cond 1110 XXX1 Vd-- Rt-- 1011 NXX1 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MCR */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmovbcr_inst {
+    unsigned int esize;
+    unsigned int index;
+    unsigned int d;
+    unsigned int t;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbcr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbcr_inst));
+    vmovbcr_inst *inst_cream = (vmovbcr_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+    inst_cream->t     = BITS(inst, 12, 15);
+    /* VFP variant of instruction */
+    inst_cream->esize = 32;
+    inst_cream->index = BIT(inst, 21);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMOVBCR_INST:
+{
+    if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbcr_inst* const inst_cream = (vmovbcr_inst*) inst_base->component;
+
+        cpu->Reg[inst_cream->t] = cpu->ExtReg[(2 * inst_cream->d) + inst_cream->index];
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmovbcr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* MRRC / MCRR instructions */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+/* cond 1100 0100 Rt2- Rt-- copr opc1 CRm- MCRR */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRSS between 2 registers to 2 singles */
+/* cond 1100 010X Rt2- Rt-- 1010 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmovbrrss_inst {
+    unsigned int to_arm;
+    unsigned int t;
+    unsigned int t2;
+    unsigned int m;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrrss)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrrss_inst));
+    vmovbrrss_inst *inst_cream = (vmovbrrss_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->to_arm = BIT(inst, 20) == 1;
+    inst_cream->t      = BITS(inst, 12, 15);
+    inst_cream->t2     = BITS(inst, 16, 19);
+    inst_cream->m      = BITS(inst, 0, 3)<<1|BIT(inst, 5);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMOVBRRSS_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbrrss_inst* const inst_cream = (vmovbrrss_inst*)inst_base->component;
+
+        VMOVBRRSS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m,
+            &cpu->Reg[inst_cream->t], &cpu->Reg[inst_cream->t2]);
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmovbrrss_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRD between 2 registers and 1 double */
+/* cond 1100 010X Rt2- Rt-- 1011 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vmovbrrd_inst {
+    unsigned int to_arm;
+    unsigned int t;
+    unsigned int t2;
+    unsigned int m;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrrd)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrrd_inst));
+    vmovbrrd_inst *inst_cream = (vmovbrrd_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->to_arm = BIT(inst, 20) == 1;
+    inst_cream->t      = BITS(inst, 12, 15);
+    inst_cream->t2     = BITS(inst, 16, 19);
+    inst_cream->m      = BIT(inst, 5)<<4 | BITS(inst, 0, 3);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VMOVBRRD_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbrrd_inst *inst_cream = (vmovbrrd_inst *)inst_base->component;
+
+        VMOVBRRD(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m,
+            &(cpu->Reg[inst_cream->t]), &(cpu->Reg[inst_cream->t2]));
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vmovbrrd_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* LDC/STC between 2 registers and 1 double */
+/* cond 110X XXX1 Rn-- CRd- copr imm- imm- LDC */
+/* cond 110X XXX0 Rn-- CRd- copr imm8 imm8 STC */
+
+/* ----------------------------------------------------------------------- */
+/* VSTR */
+/* cond 1101 UD00 Rn-- Vd-- 101X imm8 imm8 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vstr_inst {
+    unsigned int single;
+    unsigned int n;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int add;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vstr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vstr_inst));
+    vstr_inst *inst_cream = (vstr_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add    = BIT(inst, 23);
+    inst_cream->imm32  = BITS(inst, 0,7) << 2;
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n      = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VSTR_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vstr_inst *inst_cream = (vstr_inst *)inst_base->component;
+
+        unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+        addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+
+        if (inst_cream->single)
+        {
+            cpu->WriteMemory32(addr, cpu->ExtReg[inst_cream->d]);
+        }
+        else
+        {
+            const u32 word1 = cpu->ExtReg[inst_cream->d*2+0];
+            const u32 word2 = cpu->ExtReg[inst_cream->d*2+1];
+
+            if (cpu->InBigEndianMode()) {
+                cpu->WriteMemory32(addr + 0, word2);
+                cpu->WriteMemory32(addr + 4, word1);
+            } else {
+                cpu->WriteMemory32(addr + 0, word1);
+                cpu->WriteMemory32(addr + 4, word2);
+            }
+        }
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vstr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VPUSH */
+/* cond 1101 0D10 1101 Vd-- 101X imm8 imm8 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vpush_inst {
+    unsigned int single;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int regs;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vpush)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vpush_inst));
+    vpush_inst *inst_cream = (vpush_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single  = BIT(inst, 8) == 0;
+    inst_cream->d       = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+    inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VPUSH_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vpush_inst *inst_cream = (vpush_inst *)inst_base->component;
+
+        addr = cpu->Reg[R13] - inst_cream->imm32;
+
+        for (unsigned int i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                cpu->WriteMemory32(addr, cpu->ExtReg[inst_cream->d+i]);
+                addr += 4;
+            }
+            else
+            {
+                const u32 word1 = cpu->ExtReg[(inst_cream->d+i)*2+0];
+                const u32 word2 = cpu->ExtReg[(inst_cream->d+i)*2+1];
+
+                if (cpu->InBigEndianMode()) {
+                    cpu->WriteMemory32(addr + 0, word2);
+                    cpu->WriteMemory32(addr + 4, word1);
+                } else {
+                    cpu->WriteMemory32(addr + 0, word1);
+                    cpu->WriteMemory32(addr + 4, word2);
+                }
+
+                addr += 8;
+            }
+        }
+
+        cpu->Reg[R13] -= inst_cream->imm32;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vpush_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VSTM */
+/* cond 110P UDW0 Rn-- Vd-- 101X imm8 imm8 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vstm_inst {
+    unsigned int single;
+    unsigned int add;
+    unsigned int wback;
+    unsigned int d;
+    unsigned int n;
+    unsigned int imm32;
+    unsigned int regs;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vstm)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vstm_inst));
+    vstm_inst *inst_cream = (vstm_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add    = BIT(inst, 23);
+    inst_cream->wback  = BIT(inst, 21);
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n      = BITS(inst, 16, 19);
+    inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+    inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VSTM_INST: /* encoding 1 */
+{
+    if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vstm_inst* inst_cream = (vstm_inst*)inst_base->component;
+
+        u32 address = cpu->Reg[inst_cream->n];
+
+        // Only possible in ARM mode, where PC accesses have an 8 byte offset.
+        if (inst_cream->n == 15)
+            address += 8;
+
+        if (inst_cream->add == 0)
+            address -= inst_cream->imm32;
+
+        for (unsigned int i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                cpu->WriteMemory32(address, cpu->ExtReg[inst_cream->d+i]);
+                address += 4;
+            }
+            else
+            {
+                const u32 word1 = cpu->ExtReg[(inst_cream->d+i)*2+0];
+                const u32 word2 = cpu->ExtReg[(inst_cream->d+i)*2+1];
+
+                if (cpu->InBigEndianMode()) {
+                    cpu->WriteMemory32(address + 0, word2);
+                    cpu->WriteMemory32(address + 4, word1);
+                } else {
+                    cpu->WriteMemory32(address + 0, word1);
+                    cpu->WriteMemory32(address + 4, word2);
+                }
+
+                address += 8;
+            }
+        }
+        if (inst_cream->wback) {
+            cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 :
+                cpu->Reg[inst_cream->n] - inst_cream->imm32);
+        }
+    }
+    cpu->Reg[15] += 4;
+    INC_PC(sizeof(vstm_inst));
+
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VPOP */
+/* cond 1100 1D11 1101 Vd-- 101X imm8 imm8 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vpop_inst {
+    unsigned int single;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int regs;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vpop)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vpop_inst));
+    vpop_inst *inst_cream = (vpop_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single  = BIT(inst, 8) == 0;
+    inst_cream->d       = (inst_cream->single ? (BITS(inst, 12, 15)<<1)|BIT(inst, 22) : BITS(inst, 12, 15)|(BIT(inst, 22)<<4));
+    inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+    inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VPOP_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vpop_inst *inst_cream = (vpop_inst *)inst_base->component;
+
+        addr = cpu->Reg[R13];
+
+        for (unsigned int i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                cpu->ExtReg[inst_cream->d+i] = cpu->ReadMemory32(addr);
+                addr += 4;
+            }
+            else
+            {
+                const u32 word1 = cpu->ReadMemory32(addr + 0);
+                const u32 word2 = cpu->ReadMemory32(addr + 4);
+
+                if (cpu->InBigEndianMode()) {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word2;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word1;
+                } else {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word1;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word2;
+                }
+
+                addr += 8;
+            }
+        }
+        cpu->Reg[R13] += inst_cream->imm32;
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vpop_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+
+/* ----------------------------------------------------------------------- */
+/* VLDR */
+/* cond 1101 UD01 Rn-- Vd-- 101X imm8 imm8 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vldr_inst {
+    unsigned int single;
+    unsigned int n;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int add;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vldr)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vldr_inst));
+    vldr_inst *inst_cream = (vldr_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add    = BIT(inst, 23);
+    inst_cream->imm32  = BITS(inst, 0,7) << 2;
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n      = BITS(inst, 16, 19);
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VLDR_INST:
+{
+    if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vldr_inst *inst_cream = (vldr_inst *)inst_base->component;
+
+        unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+        addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+
+        if (inst_cream->single)
+        {
+            cpu->ExtReg[inst_cream->d] = cpu->ReadMemory32(addr);
+        }
+        else
+        {
+            const u32 word1 = cpu->ReadMemory32(addr + 0);
+            const u32 word2 = cpu->ReadMemory32(addr + 4);
+
+            if (cpu->InBigEndianMode()) {
+                cpu->ExtReg[inst_cream->d*2+0] = word2;
+                cpu->ExtReg[inst_cream->d*2+1] = word1;
+            } else {
+                cpu->ExtReg[inst_cream->d*2+0] = word1;
+                cpu->ExtReg[inst_cream->d*2+1] = word2;
+            }
+        }
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vldr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* VLDM */
+/* cond 110P UDW1 Rn-- Vd-- 101X imm8 imm8 */
+#ifdef VFP_INTERPRETER_STRUCT
+struct vldm_inst {
+    unsigned int single;
+    unsigned int add;
+    unsigned int wback;
+    unsigned int d;
+    unsigned int n;
+    unsigned int imm32;
+    unsigned int regs;
+};
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+static ARM_INST_PTR INTERPRETER_TRANSLATE(vldm)(unsigned int inst, int index)
+{
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vldm_inst));
+    vldm_inst *inst_cream = (vldm_inst *)inst_base->component;
+
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
+
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add    = BIT(inst, 23);
+    inst_cream->wback  = BIT(inst, 21);
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n      = BITS(inst, 16, 19);
+    inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+    inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+    return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VLDM_INST:
+{
+    if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vldm_inst* inst_cream = (vldm_inst*)inst_base->component;
+
+        u32 address = cpu->Reg[inst_cream->n];
+
+        // Only possible in ARM mode, where PC accesses have an 8 byte offset.
+        if (inst_cream->n == 15)
+            address += 8;
+
+        if (inst_cream->add == 0)
+            address -= inst_cream->imm32;
+
+        for (unsigned int i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                cpu->ExtReg[inst_cream->d+i] = cpu->ReadMemory32(address);
+                address += 4;
+            }
+            else
+            {
+                const u32 word1 = cpu->ReadMemory32(address + 0);
+                const u32 word2 = cpu->ReadMemory32(address + 4);
+
+                if (cpu->InBigEndianMode()) {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word2;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word1;
+                } else {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word1;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word2;
+                }
+
+                address += 8;
+            }
+        }
+        if (inst_cream->wback) {
+            cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 :
+                cpu->Reg[inst_cream->n] - inst_cream->imm32);
+        }
+    }
+    cpu->Reg[15] += cpu->GetInstructionSize();
+    INC_PC(sizeof(vldm_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
+}
+#endif
diff --git a/tests/skyeye_interpreter/skyeye_common/vfp/vfpsingle.cpp b/tests/skyeye_interpreter/skyeye_common/vfp/vfpsingle.cpp
new file mode 100644
index 00000000..8b85b8da
--- /dev/null
+++ b/tests/skyeye_interpreter/skyeye_common/vfp/vfpsingle.cpp
@@ -0,0 +1,1287 @@
+/*
+    vfp/vfpsingle.c - ARM VFPv3 emulation unit - SoftFloat single instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+
+#include <algorithm>
+#include <cinttypes>
+
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp_helper.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/asm_vfp.h"
+#include "tests/skyeye_interpreter/skyeye_common/vfp/vfp.h"
+
+static struct vfp_single vfp_single_default_qnan = {
+    255,
+    0,
+    VFP_SINGLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_single_dump(const char *str, struct vfp_single *s)
+{
+    LOG_TRACE(Core_ARM11, "%s: sign=%d exponent=%d significand=%08x",
+              str, s->sign != 0, s->exponent, s->significand);
+}
+
+static void vfp_single_normalise_denormal(struct vfp_single *vs)
+{
+    int bits = 31 - fls(vs->significand);
+
+    vfp_single_dump("normalise_denormal: in", vs);
+
+    if (bits) {
+        vs->exponent -= bits - 1;
+        vs->significand <<= bits;
+    }
+
+    vfp_single_dump("normalise_denormal: out", vs);
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
+{
+    u32 significand, incr, rmode;
+    int exponent, shift, underflow;
+
+    vfp_single_dump("pack: in", vs);
+
+    /*
+     * Infinities and NaNs are a special case.
+     */
+    if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
+        goto pack;
+
+    /*
+     * Special-case zero.
+     */
+    if (vs->significand == 0) {
+        vs->exponent = 0;
+        goto pack;
+    }
+
+    exponent = vs->exponent;
+    significand = vs->significand;
+
+    /*
+     * Normalise first.  Note that we shift the significand up to
+     * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
+     * significant bit.
+     */
+    shift = 32 - fls(significand);
+    if (shift < 32 && shift) {
+        exponent -= shift;
+        significand <<= shift;
+    }
+
+#if 1
+    vs->exponent = exponent;
+    vs->significand = significand;
+    vfp_single_dump("pack: normalised", vs);
+#endif
+
+    /*
+     * Tiny number?
+     */
+    underflow = exponent < 0;
+    if (underflow) {
+        significand = vfp_shiftright32jamming(significand, -exponent);
+        exponent = 0;
+#if 1
+        vs->exponent = exponent;
+        vs->significand = significand;
+        vfp_single_dump("pack: tiny number", vs);
+#endif
+        if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
+            underflow = 0;
+    }
+
+    /*
+     * Select rounding increment.
+     */
+    incr = 0;
+    rmode = fpscr & FPSCR_RMODE_MASK;
+
+    if (rmode == FPSCR_ROUND_NEAREST) {
+        incr = 1 << VFP_SINGLE_LOW_BITS;
+        if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
+            incr -= 1;
+    } else if (rmode == FPSCR_ROUND_TOZERO) {
+        incr = 0;
+    } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
+        incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
+
+    LOG_TRACE(Core_ARM11, "rounding increment = 0x%08x", incr);
+
+    /*
+     * Is our rounding going to overflow?
+     */
+    if ((significand + incr) < significand) {
+        exponent += 1;
+        significand = (significand >> 1) | (significand & 1);
+        incr >>= 1;
+#if 1
+        vs->exponent = exponent;
+        vs->significand = significand;
+        vfp_single_dump("pack: overflow", vs);
+#endif
+    }
+
+    /*
+     * If any of the low bits (which will be shifted out of the
+     * number) are non-zero, the result is inexact.
+     */
+    if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
+        exceptions |= FPSCR_IXC;
+
+    /*
+     * Do our rounding.
+     */
+    significand += incr;
+
+    /*
+     * Infinity?
+     */
+    if (exponent >= 254) {
+        exceptions |= FPSCR_OFC | FPSCR_IXC;
+        if (incr == 0) {
+            vs->exponent = 253;
+            vs->significand = 0x7fffffff;
+        } else {
+            vs->exponent = 255;		/* infinity */
+            vs->significand = 0;
+        }
+    } else {
+        if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
+            exponent = 0;
+        if (exponent || significand > 0x80000000)
+            underflow = 0;
+        if (underflow)
+            exceptions |= FPSCR_UFC;
+        vs->exponent = exponent;
+        vs->significand = significand >> 1;
+    }
+
+pack:
+    vfp_single_dump("pack: final", vs);
+    {
+        s32 d = vfp_single_pack(vs);
+        LOG_TRACE(Core_ARM11, "%s: d(s%d)=%08x exceptions=%08x", func,
+                  sd, d, exceptions);
+        vfp_put_float(state, d, sd);
+    }
+
+    return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
+                  struct vfp_single *vsm, u32 fpscr)
+{
+    struct vfp_single *nan;
+    int tn, tm = 0;
+
+    tn = vfp_single_type(vsn);
+
+    if (vsm)
+        tm = vfp_single_type(vsm);
+
+    if (fpscr & FPSCR_DEFAULT_NAN)
+        /*
+         * Default NaN mode - always returns a quiet NaN
+         */
+        nan = &vfp_single_default_qnan;
+    else {
+        /*
+         * Contemporary mode - select the first signalling
+         * NAN, or if neither are signalling, the first
+         * quiet NAN.
+         */
+        if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+            nan = vsn;
+        else
+            nan = vsm;
+        /*
+         * Make the NaN quiet.
+         */
+        nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+    }
+
+    *vsd = *nan;
+
+    /*
+     * If one was a signalling NAN, raise invalid operation.
+     */
+    return tn == VFP_SNAN || tm == VFP_SNAN ? (u32)FPSCR_IOC : (u32)VFP_NAN_FLAG;
+}
+
+
+/*
+ * Extended operations
+ */
+static u32 vfp_single_fabs(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    vfp_put_float(state, vfp_single_packed_abs(m), sd);
+    return 0;
+}
+
+static u32 vfp_single_fcpy(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    vfp_put_float(state, m, sd);
+    return 0;
+}
+
+static u32 vfp_single_fneg(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    vfp_put_float(state, vfp_single_packed_negate(m), sd);
+    return 0;
+}
+
+static const u16 sqrt_oddadjust[] = {
+    0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
+    0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
+};
+
+static const u16 sqrt_evenadjust[] = {
+    0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
+    0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
+};
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
+{
+    int index;
+    u32 z, a;
+
+    if ((significand & 0xc0000000) != 0x40000000) {
+        LOG_TRACE(Core_ARM11, "invalid significand");
+    }
+
+    a = significand << 1;
+    index = (a >> 27) & 15;
+    if (exponent & 1) {
+        z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
+        z = ((a / z) << 14) + (z << 15);
+        a >>= 1;
+    } else {
+        z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
+        z = a / z + z;
+        z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
+        if (z <= a)
+            return (s32)a >> 1;
+    }
+    {
+        u64 v = (u64)a << 31;
+        do_div(v, z);
+        return (u32)(v + (z >> 1));
+    }
+}
+
+static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    struct vfp_single vsm, vsd, *vsp;
+    int ret, tm;
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+    tm = vfp_single_type(&vsm);
+    if (tm & (VFP_NAN|VFP_INFINITY)) {
+        vsp = &vsd;
+
+        if (tm & VFP_NAN)
+            ret = vfp_propagate_nan(vsp, &vsm, nullptr, fpscr);
+        else if (vsm.sign == 0) {
+sqrt_copy:
+            vsp = &vsm;
+            ret = 0;
+        } else {
+sqrt_invalid:
+            vsp = &vfp_single_default_qnan;
+            ret = FPSCR_IOC;
+        }
+        vfp_put_float(state, vfp_single_pack(vsp), sd);
+        return ret;
+    }
+
+    /*
+     * sqrt(+/- 0) == +/- 0
+     */
+    if (tm & VFP_ZERO)
+        goto sqrt_copy;
+
+    /*
+     * Normalise a denormalised number
+     */
+    if (tm & VFP_DENORMAL)
+        vfp_single_normalise_denormal(&vsm);
+
+    /*
+     * sqrt(<0) = invalid
+     */
+    if (vsm.sign)
+        goto sqrt_invalid;
+
+    vfp_single_dump("sqrt", &vsm);
+
+    /*
+     * Estimate the square root.
+     */
+    vsd.sign = 0;
+    vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
+    vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
+
+    vfp_single_dump("sqrt estimate", &vsd);
+
+    /*
+     * And now adjust.
+     */
+    if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
+        if (vsd.significand < 2) {
+            vsd.significand = 0xffffffff;
+        } else {
+            u64 term;
+            s64 rem;
+            vsm.significand <<= static_cast<u32>((vsm.exponent & 1) == 0);
+            term = (u64)vsd.significand * vsd.significand;
+            rem = ((u64)vsm.significand << 32) - term;
+
+            LOG_TRACE(Core_ARM11, "term=%016" PRIx64 "rem=%016" PRIx64, term, rem);
+
+            while (rem < 0) {
+                vsd.significand -= 1;
+                rem += ((u64)vsd.significand << 1) | 1;
+            }
+            vsd.significand |= rem != 0;
+        }
+    }
+    vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
+
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u32 fpscr)
+{
+    s32 d;
+    u32 ret = 0;
+
+    d = vfp_get_float(state, sd);
+    if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
+        ret |= FPSCR_CFLAG | FPSCR_VFLAG;
+        if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+            /*
+             * Signalling NaN, or signalling on quiet NaN
+             */
+            ret |= FPSCR_IOC;
+    }
+
+    if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
+        ret |= FPSCR_CFLAG | FPSCR_VFLAG;
+        if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+            /*
+             * Signalling NaN, or signalling on quiet NaN
+             */
+            ret |= FPSCR_IOC;
+    }
+
+    if (ret == 0) {
+        if (d == m || vfp_single_packed_abs(d | m) == 0) {
+            /*
+             * equal
+             */
+            ret |= FPSCR_ZFLAG | FPSCR_CFLAG;
+        } else if (vfp_single_packed_sign(d ^ m)) {
+            /*
+             * different signs
+             */
+            if (vfp_single_packed_sign(d))
+                /*
+                 * d is negative, so d < m
+                 */
+                ret |= FPSCR_NFLAG;
+            else
+                /*
+                 * d is positive, so d > m
+                 */
+                ret |= FPSCR_CFLAG;
+        } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
+            /*
+             * d < m
+             */
+            ret |= FPSCR_NFLAG;
+        } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
+            /*
+             * d > m
+             */
+            ret |= FPSCR_CFLAG;
+        }
+    }
+    return ret;
+}
+
+static u32 vfp_single_fcmp(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    return vfp_compare(state, sd, 0, m, fpscr);
+}
+
+static u32 vfp_single_fcmpe(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    return vfp_compare(state, sd, 1, m, fpscr);
+}
+
+static u32 vfp_single_fcmpz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    return vfp_compare(state, sd, 0, 0, fpscr);
+}
+
+static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    return vfp_compare(state, sd, 1, 0, fpscr);
+}
+
+static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr)
+{
+    struct vfp_single vsm;
+    struct vfp_double vdd;
+    int tm;
+    u32 exceptions = 0;
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+
+    tm = vfp_single_type(&vsm);
+
+    /*
+     * If we have a signalling NaN, signal invalid operation.
+     */
+    if (tm == VFP_SNAN)
+        exceptions = FPSCR_IOC;
+
+    if (tm & VFP_DENORMAL)
+        vfp_single_normalise_denormal(&vsm);
+
+    vdd.sign = vsm.sign;
+    vdd.significand = (u64)vsm.significand << 32;
+
+    /*
+     * If we have an infinity or NaN, the exponent must be 2047.
+     */
+    if (tm & (VFP_INFINITY|VFP_NAN)) {
+        vdd.exponent = 2047;
+        if (tm == VFP_QNAN)
+            vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+        goto pack_nan;
+    } else if (tm & VFP_ZERO)
+        vdd.exponent = 0;
+    else
+        vdd.exponent = vsm.exponent + (1023 - 127);
+
+    return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fcvtd");
+
+pack_nan:
+    vfp_put_double(state, vfp_double_pack(&vdd), dd);
+    return exceptions;
+}
+
+static u32 vfp_single_fuito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    struct vfp_single vs;
+
+    vs.sign = 0;
+    vs.exponent = 127 + 31 - 1;
+    vs.significand = (u32)m;
+
+    return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fuito");
+}
+
+static u32 vfp_single_fsito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    struct vfp_single vs;
+
+    vs.sign = (m & 0x80000000) >> 16;
+    vs.exponent = 127 + 31 - 1;
+    vs.significand = vs.sign ? -m : m;
+
+    return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fsito");
+}
+
+static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    struct vfp_single vsm;
+    u32 d, exceptions = 0;
+    int rmode = fpscr & FPSCR_RMODE_MASK;
+    int tm;
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+    vfp_single_dump("VSM", &vsm);
+
+    /*
+     * Do we have a denormalised number?
+     */
+    tm = vfp_single_type(&vsm);
+    if (tm & VFP_DENORMAL)
+        exceptions |= FPSCR_IDC;
+
+    if (tm & VFP_NAN)
+        vsm.sign = 1;
+
+    if (vsm.exponent >= 127 + 32) {
+        d = vsm.sign ? 0 : 0xffffffff;
+        exceptions = FPSCR_IOC;
+    } else if (vsm.exponent >= 127) {
+        int shift = 127 + 31 - vsm.exponent;
+        u32 rem, incr = 0;
+
+        /*
+         * 2^0 <= m < 2^32-2^8
+         */
+        d = (vsm.significand << 1) >> shift;
+        rem = vsm.significand << (33 - shift);
+
+        if (rmode == FPSCR_ROUND_NEAREST) {
+            incr = 0x80000000;
+            if ((d & 1) == 0)
+                incr -= 1;
+        } else if (rmode == FPSCR_ROUND_TOZERO) {
+            incr = 0;
+        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+            incr = ~0;
+        }
+
+        if ((rem + incr) < rem) {
+            if (d < 0xffffffff)
+                d += 1;
+            else
+                exceptions |= FPSCR_IOC;
+        }
+
+        if (d && vsm.sign) {
+            d = 0;
+            exceptions |= FPSCR_IOC;
+        } else if (rem)
+            exceptions |= FPSCR_IXC;
+    } else {
+        d = 0;
+        if (vsm.exponent | vsm.significand) {
+            exceptions |= FPSCR_IXC;
+            if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+                d = 1;
+            else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
+                d = 0;
+                exceptions |= FPSCR_IOC;
+            }
+        }
+    }
+
+    LOG_TRACE(Core_ARM11, "ftoui: d(s%d)=%08x exceptions=%08x", sd, d, exceptions);
+
+    vfp_put_float(state, d, sd);
+
+    return exceptions;
+}
+
+static u32 vfp_single_ftouiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    return vfp_single_ftoui(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    struct vfp_single vsm;
+    u32 d, exceptions = 0;
+    int rmode = fpscr & FPSCR_RMODE_MASK;
+    int tm;
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+    vfp_single_dump("VSM", &vsm);
+
+    /*
+     * Do we have a denormalised number?
+     */
+    tm = vfp_single_type(&vsm);
+    if (vfp_single_type(&vsm) & VFP_DENORMAL)
+        exceptions |= FPSCR_IDC;
+
+    if (tm & VFP_NAN) {
+        d = 0;
+        exceptions |= FPSCR_IOC;
+    } else if (vsm.exponent >= 127 + 32) {
+        /*
+         * m >= 2^31-2^7: invalid
+         */
+        d = 0x7fffffff;
+        if (vsm.sign)
+            d = ~d;
+        exceptions |= FPSCR_IOC;
+    } else if (vsm.exponent >= 127) {
+        int shift = 127 + 31 - vsm.exponent;
+        u32 rem, incr = 0;
+
+        /* 2^0 <= m <= 2^31-2^7 */
+        d = (vsm.significand << 1) >> shift;
+        rem = vsm.significand << (33 - shift);
+
+        if (rmode == FPSCR_ROUND_NEAREST) {
+            incr = 0x80000000;
+            if ((d & 1) == 0)
+                incr -= 1;
+        } else if (rmode == FPSCR_ROUND_TOZERO) {
+            incr = 0;
+        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+            incr = ~0;
+        }
+
+        if ((rem + incr) < rem && d < 0xffffffff)
+            d += 1;
+        if (d > (0x7fffffffu + (vsm.sign != 0))) {
+            d = (0x7fffffffu + (vsm.sign != 0));
+            exceptions |= FPSCR_IOC;
+        } else if (rem)
+            exceptions |= FPSCR_IXC;
+
+        if (vsm.sign)
+            d = (~d + 1);
+    } else {
+        d = 0;
+        if (vsm.exponent | vsm.significand) {
+            exceptions |= FPSCR_IXC;
+            if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+                d = 1;
+            else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
+                d = -1;
+        }
+    }
+
+    LOG_TRACE(Core_ARM11, "ftosi: d(s%d)=%08x exceptions=%08x", sd, d, exceptions);
+
+    vfp_put_float(state, (s32)d, sd);
+
+    return exceptions;
+}
+
+static u32 vfp_single_ftosiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+    return vfp_single_ftosi(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_single_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_single_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_single_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_single_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { vfp_single_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_single_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_single_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_single_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { vfp_single_fcvtd,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_single_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_single_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { nullptr, 0 },
+    { vfp_single_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_single_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_single_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_single_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+
+
+
+
+
+static u32
+vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
+                          struct vfp_single *vsm, u32 fpscr)
+{
+    struct vfp_single *vsp;
+    u32 exceptions = 0;
+    int tn, tm;
+
+    tn = vfp_single_type(vsn);
+    tm = vfp_single_type(vsm);
+
+    if (tn & tm & VFP_INFINITY) {
+        /*
+         * Two infinities.  Are they different signs?
+         */
+        if (vsn->sign ^ vsm->sign) {
+            /*
+             * different signs -> invalid
+             */
+            exceptions = FPSCR_IOC;
+            vsp = &vfp_single_default_qnan;
+        } else {
+            /*
+             * same signs -> valid
+             */
+            vsp = vsn;
+        }
+    } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+        /*
+         * One infinity and one number -> infinity
+         */
+        vsp = vsn;
+    } else {
+        /*
+         * 'n' is a NaN of some type
+         */
+        return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+    }
+    *vsd = *vsp;
+    return exceptions;
+}
+
+static u32
+vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
+               struct vfp_single *vsm, u32 fpscr)
+{
+    u32 exp_diff, m_sig;
+
+    if (vsn->significand & 0x80000000 ||
+            vsm->significand & 0x80000000) {
+        LOG_WARNING(Core_ARM11, "bad FP values");
+        vfp_single_dump("VSN", vsn);
+        vfp_single_dump("VSM", vsm);
+    }
+
+    /*
+     * Ensure that 'n' is the largest magnitude number.  Note that
+     * if 'n' and 'm' have equal exponents, we do not swap them.
+     * This ensures that NaN propagation works correctly.
+     */
+    if (vsn->exponent < vsm->exponent) {
+        std::swap(vsm, vsn);
+    }
+
+    /*
+     * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+     * infinity or a NaN here.
+     */
+    if (vsn->exponent == 255)
+        return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
+
+    /*
+     * We have two proper numbers, where 'vsn' is the larger magnitude.
+     *
+     * Copy 'n' to 'd' before doing the arithmetic.
+     */
+    *vsd = *vsn;
+
+    /*
+     * Align both numbers.
+     */
+    exp_diff = vsn->exponent - vsm->exponent;
+    m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
+
+    /*
+     * If the signs are different, we are really subtracting.
+     */
+    if (vsn->sign ^ vsm->sign) {
+        m_sig = vsn->significand - m_sig;
+        if ((s32)m_sig < 0) {
+            vsd->sign = vfp_sign_negate(vsd->sign);
+            m_sig = (~m_sig + 1);
+        } else if (m_sig == 0) {
+            vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+                        FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+        }
+    } else {
+        m_sig = vsn->significand + m_sig;
+    }
+    vsd->significand = m_sig;
+
+    return 0;
+}
+
+static u32
+vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
+{
+    vfp_single_dump("VSN", vsn);
+    vfp_single_dump("VSM", vsm);
+
+    /*
+     * Ensure that 'n' is the largest magnitude number.  Note that
+     * if 'n' and 'm' have equal exponents, we do not swap them.
+     * This ensures that NaN propagation works correctly.
+     */
+    if (vsn->exponent < vsm->exponent) {
+        std::swap(vsm, vsn);
+        LOG_TRACE(Core_ARM11, "swapping M <-> N");
+    }
+
+    vsd->sign = vsn->sign ^ vsm->sign;
+
+    /*
+     * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+     */
+    if (vsn->exponent == 255) {
+        if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
+            return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+        if ((vsm->exponent | vsm->significand) == 0) {
+            *vsd = vfp_single_default_qnan;
+            return FPSCR_IOC;
+        }
+        vsd->exponent = vsn->exponent;
+        vsd->significand = 0;
+        return 0;
+    }
+
+    /*
+     * If 'm' is zero, the result is always zero.  In this case,
+     * 'n' may be zero or a number, but it doesn't matter which.
+     */
+    if ((vsm->exponent | vsm->significand) == 0) {
+        vsd->exponent = 0;
+        vsd->significand = 0;
+        return 0;
+    }
+
+    /*
+     * We add 2 to the destination exponent for the same reason as
+     * the addition case - though this time we have +1 from each
+     * input operand.
+     */
+    vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
+    vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
+
+    vfp_single_dump("VSD", vsd);
+    return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr, u32 negate, const char *func)
+{
+    vfp_single vsd, vsp, vsn, vsm;
+    u32 exceptions;
+    s32 v;
+
+    v = vfp_get_float(state, sn);
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, v);
+    vfp_single_unpack(&vsn, v, &fpscr);
+    if (vsn.exponent == 0 && vsn.significand)
+        vfp_single_normalise_denormal(&vsn);
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+    if (vsm.exponent == 0 && vsm.significand)
+        vfp_single_normalise_denormal(&vsm);
+
+    exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
+
+    if (negate & NEG_MULTIPLY)
+        vsp.sign = vfp_sign_negate(vsp.sign);
+
+    v = vfp_get_float(state, sd);
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sd, v);
+    vfp_single_unpack(&vsn, v, &fpscr);
+    if (vsn.exponent == 0 && vsn.significand != 0)
+        vfp_single_normalise_denormal(&vsn);
+
+    if (negate & NEG_SUBTRACT)
+        vsn.sign = vfp_sign_negate(vsn.sign);
+
+    exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
+
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_single_fmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, sd);
+    return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_single_fnmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    // TODO: this one has its arguments inverted, investigate.
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sd, sn);
+    return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_single_fmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, sd);
+    return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_single_fnmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, sd);
+    return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    struct vfp_single vsd, vsn, vsm;
+    u32 exceptions;
+    s32 n = vfp_get_float(state, sn);
+
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, n);
+
+    vfp_single_unpack(&vsn, n, &fpscr);
+    if (vsn.exponent == 0 && vsn.significand)
+        vfp_single_normalise_denormal(&vsn);
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+    if (vsm.exponent == 0 && vsm.significand)
+        vfp_single_normalise_denormal(&vsm);
+
+    exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    struct vfp_single vsd, vsn, vsm;
+    u32 exceptions;
+    s32 n = vfp_get_float(state, sn);
+
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, n);
+
+    vfp_single_unpack(&vsn, n, &fpscr);
+    if (vsn.exponent == 0 && vsn.significand)
+        vfp_single_normalise_denormal(&vsn);
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+    if (vsm.exponent == 0 && vsm.significand)
+        vfp_single_normalise_denormal(&vsm);
+
+    exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+    vsd.sign = vfp_sign_negate(vsd.sign);
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    struct vfp_single vsd, vsn, vsm;
+    u32 exceptions;
+    s32 n = vfp_get_float(state, sn);
+
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, n);
+
+    /*
+     * Unpack and normalise denormals.
+     */
+    vfp_single_unpack(&vsn, n, &fpscr);
+    if (vsn.exponent == 0 && vsn.significand)
+        vfp_single_normalise_denormal(&vsn);
+
+    vfp_single_unpack(&vsm, m, &fpscr);
+    if (vsm.exponent == 0 && vsm.significand)
+        vfp_single_normalise_denormal(&vsm);
+
+    exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
+
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_single_fsub(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, sd);
+    /*
+     * Subtraction is addition with one sign inverted.
+     */
+    if (m != 0x7FC00000) // Only negate if m isn't NaN.
+        m = vfp_single_packed_negate(m);
+
+    return vfp_single_fadd(state, sd, sn, m, fpscr);
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+    struct vfp_single vsd, vsn, vsm;
+    u32 exceptions = 0;
+    s32 n = vfp_get_float(state, sn);
+    int tm, tn;
+
+    LOG_TRACE(Core_ARM11, "s%u = %08x", sn, n);
+
+    vfp_single_unpack(&vsn, n, &fpscr);
+    vfp_single_unpack(&vsm, m, &fpscr);
+
+    vsd.sign = vsn.sign ^ vsm.sign;
+
+    tn = vfp_single_type(&vsn);
+    tm = vfp_single_type(&vsm);
+
+    /*
+     * Is n a NAN?
+     */
+    if (tn & VFP_NAN)
+        goto vsn_nan;
+
+    /*
+     * Is m a NAN?
+     */
+    if (tm & VFP_NAN)
+        goto vsm_nan;
+
+    /*
+     * If n and m are infinity, the result is invalid
+     * If n and m are zero, the result is invalid
+     */
+    if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+        goto invalid;
+
+    /*
+     * If n is infinity, the result is infinity
+     */
+    if (tn & VFP_INFINITY)
+        goto infinity;
+
+    /*
+     * If m is zero, raise div0 exception
+     */
+    if (tm & VFP_ZERO)
+        goto divzero;
+
+    /*
+     * If m is infinity, or n is zero, the result is zero
+     */
+    if (tm & VFP_INFINITY || tn & VFP_ZERO)
+        goto zero;
+
+    if (tn & VFP_DENORMAL)
+        vfp_single_normalise_denormal(&vsn);
+    if (tm & VFP_DENORMAL)
+        vfp_single_normalise_denormal(&vsm);
+
+    /*
+     * Ok, we have two numbers, we can perform division.
+     */
+    vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
+    vsm.significand <<= 1;
+    if (vsm.significand <= (2 * vsn.significand)) {
+        vsn.significand >>= 1;
+        vsd.exponent++;
+    }
+    {
+        u64 significand = (u64)vsn.significand << 32;
+        do_div(significand, vsm.significand);
+        vsd.significand = (u32)significand;
+    }
+    if ((vsd.significand & 0x3f) == 0)
+        vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
+
+    return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fdiv");
+
+vsn_nan:
+    exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
+pack:
+    vfp_put_float(state, vfp_single_pack(&vsd), sd);
+    return exceptions;
+
+vsm_nan:
+    exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
+    goto pack;
+
+zero:
+    vsd.exponent = 0;
+    vsd.significand = 0;
+    goto pack;
+
+divzero:
+    exceptions = FPSCR_DZC;
+infinity:
+    vsd.exponent = 255;
+    vsd.significand = 0;
+    goto pack;
+
+invalid:
+    vfp_put_float(state, vfp_single_pack(&vfp_single_default_qnan), sd);
+    return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+	{ vfp_single_fmac,  0 },
+	{ vfp_single_fmsc,  0 },
+	{ vfp_single_fmul,  0 },
+	{ vfp_single_fadd,  0 },
+	{ vfp_single_fnmac, 0 },
+	{ vfp_single_fnmsc, 0 },
+	{ vfp_single_fnmul, 0 },
+	{ vfp_single_fsub,  0 },
+	{ vfp_single_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x18)
+#define FREG_IDX(x)	((x) & 7)
+
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+    u32 op = inst & FOP_MASK;
+    u32 exceptions = 0;
+    unsigned int dest;
+    unsigned int sn = vfp_get_sn(inst);
+    unsigned int sm = vfp_get_sm(inst);
+    unsigned int vecitr, veclen, vecstride;
+    struct op *fop;
+
+    vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
+
+    fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+    /*
+     * fcvtsd takes a dN register number as destination, not sN.
+     * Technically, if bit 0 of dd is set, this is an invalid
+     * instruction.  However, we ignore this for efficiency.
+     * It also only operates on scalars.
+     */
+    if (fop->flags & OP_DD)
+        dest = vfp_get_dd(inst);
+    else
+        dest = vfp_get_sd(inst);
+
+    /*
+     * If destination bank is zero, vector length is always '1'.
+     * ARM DDI0100F C5.1.3, C5.3.2.
+     */
+    if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
+        veclen = 0;
+    else
+        veclen = fpscr & FPSCR_LENGTH_MASK;
+
+    LOG_TRACE(Core_ARM11, "vecstride=%u veclen=%u", vecstride,
+              (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+    if (!fop->fn) {
+        LOG_CRITICAL(Core_ARM11, "could not find single op %d, inst=0x%x@0x%x", FEXT_TO_IDX(inst), inst, state->Reg[15]);
+        exit(-1);
+        goto invalid;
+    }
+
+    for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+        s32 m = vfp_get_float(state, sm);
+        u32 except;
+        char type;
+
+        type = (fop->flags & OP_DD) ? 'd' : 's';
+        (void)type;
+
+        if (op == FOP_EXT)
+            LOG_TRACE(Core_ARM11, "itr%d (%c%u) = op[%u] (s%u=%08x)",
+                      vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+                      sm, m);
+        else
+            LOG_TRACE(Core_ARM11, "itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)",
+                      vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+                      FOP_TO_IDX(op), sm, m);
+
+        except = fop->fn(state, dest, sn, m, fpscr);
+        LOG_TRACE(Core_ARM11, "itr%d: exceptions=%08x",
+                  vecitr >> FPSCR_LENGTH_BIT, except);
+
+        exceptions |= except;
+
+        /*
+         * CHECK: It appears to be undefined whether we stop when
+         * we encounter an exception.  We continue.
+         */
+        dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
+        sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
+        if (FREG_BANK(sm) != 0)
+            sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
+    }
+    return exceptions;
+
+invalid:
+    return (u32)-1;
+}