2016-07-01 15:01:06 +02:00
|
|
|
/* This file is part of the dynarmic project.
|
|
|
|
* Copyright (c) 2016 MerryMage
|
|
|
|
* This software may be used and distributed according to the terms of the GNU
|
|
|
|
* General Public License version 2 or any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <unordered_map>
|
2016-07-23 00:55:00 +02:00
|
|
|
#include <common/bit_util.h>
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2016-09-03 22:48:03 +02:00
|
|
|
#include <xbyak.h>
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
#include "backend_x64/abi.h"
|
2016-07-01 15:01:06 +02:00
|
|
|
#include "backend_x64/emit_x64.h"
|
2016-08-24 21:07:08 +02:00
|
|
|
#include "backend_x64/jitstate.h"
|
2016-09-06 01:52:33 +02:00
|
|
|
#include "frontend/arm/types.h"
|
2016-09-03 22:48:03 +02:00
|
|
|
#include "frontend/ir/basic_block.h"
|
2016-09-05 12:54:09 +02:00
|
|
|
#include "frontend/ir/location_descriptor.h"
|
2016-09-03 22:48:03 +02:00
|
|
|
#include "frontend/ir/microinstruction.h"
|
2016-07-01 15:01:06 +02:00
|
|
|
|
|
|
|
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
|
|
|
|
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
|
|
|
|
|
|
|
|
namespace Dynarmic {
|
|
|
|
namespace BackendX64 {
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Address MJitStateReg(Arm::Reg reg) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) {
|
|
|
|
using namespace Xbyak::util;
|
2016-08-05 19:54:19 +02:00
|
|
|
if (reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31) {
|
|
|
|
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::S0);
|
2016-08-24 21:07:08 +02:00
|
|
|
return dword[r15 + offsetof(JitState, ExtReg) + sizeof(u32) * index];
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
if (reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31) {
|
|
|
|
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::D0);
|
2016-08-24 21:07:08 +02:00
|
|
|
return qword[r15 + offsetof(JitState, ExtReg) + sizeof(u64) * index];
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
ASSERT_MSG(false, "Should never happen.");
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Address MJitStateCpsr() {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
return dword[r15 + offsetof(JitState, Cpsr)];
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
static void EraseInstruction(IR::Block& block, IR::Inst* inst) {
|
2016-08-26 21:38:59 +02:00
|
|
|
block.Instructions().erase(inst);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-08-26 20:14:25 +02:00
|
|
|
EmitX64::BlockDescriptor EmitX64::Emit(IR::Block& block) {
|
2016-09-05 12:54:09 +02:00
|
|
|
const IR::LocationDescriptor descriptor = block.Location();
|
2016-08-26 20:14:25 +02:00
|
|
|
|
2016-07-04 15:37:50 +02:00
|
|
|
reg_alloc.Reset();
|
|
|
|
|
2016-08-27 12:04:43 +02:00
|
|
|
code->align();
|
2016-08-24 21:07:08 +02:00
|
|
|
const CodePtr code_ptr = code->getCurr();
|
2016-08-05 02:50:31 +02:00
|
|
|
basic_blocks[descriptor].code_ptr = code_ptr;
|
2016-08-13 01:10:23 +02:00
|
|
|
unique_hash_to_code_ptr[descriptor.UniqueHash()] = code_ptr;
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2016-08-18 19:16:18 +02:00
|
|
|
EmitCondPrelude(block);
|
2016-07-14 13:52:53 +02:00
|
|
|
|
basic_block: Add proxy member functions for the instruction list
Currently basic block kind of acts like a 'dumb struct' which makes things
a little more verbose to write (as opposed to keeping it all in one place,
I guess). It's also a little wonky conceptually, considering a block is
composed of instructions (i.e. 'contains' them).
So providing accessors that make it act more like a container can make working
with algorithms a little nicer. It also makes the API a little more
defined.
Ideally, the list would be only available through a function, but
currently, the pool allocator is exposed, which seems somewhat odd,
considering the block itself should manage its overall allocations
(with placement new, and regular new), rather than putting that
sanitizing directly on the IR emitter (it should just care about emission,
not block state). However, recontaining that can be followed up with,
as it's very trivial to do.
2016-08-21 18:35:30 +02:00
|
|
|
for (auto iter = block.begin(); iter != block.end(); ++iter) {
|
2016-07-23 00:55:00 +02:00
|
|
|
IR::Inst* inst = &*iter;
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2016-07-08 09:28:56 +02:00
|
|
|
// Call the relevant Emit* member function.
|
2016-07-23 00:55:00 +02:00
|
|
|
switch (inst->GetOpcode()) {
|
2016-07-08 09:28:56 +02:00
|
|
|
|
2016-08-23 00:40:30 +02:00
|
|
|
#define OPCODE(name, type, ...) \
|
|
|
|
case IR::Opcode::name: \
|
|
|
|
EmitX64::Emit##name(block, inst); \
|
|
|
|
break;
|
2016-07-08 09:28:56 +02:00
|
|
|
#include "frontend/ir/opcodes.inc"
|
|
|
|
#undef OPCODE
|
|
|
|
|
2016-08-23 00:40:30 +02:00
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid opcode %zu", static_cast<size_t>(inst->GetOpcode()));
|
|
|
|
break;
|
2016-07-08 09:28:56 +02:00
|
|
|
}
|
|
|
|
|
2016-07-01 15:01:06 +02:00
|
|
|
reg_alloc.EndOfAllocScope();
|
|
|
|
}
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
EmitAddCycles(block.CycleCount());
|
|
|
|
EmitTerminal(block.GetTerminal(), block.Location());
|
2016-08-27 12:04:43 +02:00
|
|
|
code->int3();
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2016-07-11 23:43:53 +02:00
|
|
|
reg_alloc.AssertNoMoreUses();
|
|
|
|
|
2016-08-07 23:11:39 +02:00
|
|
|
Patch(descriptor, code_ptr);
|
2016-08-24 21:07:08 +02:00
|
|
|
basic_blocks[descriptor].size = std::intptr_t(code->getCurr()) - std::intptr_t(code_ptr);
|
2016-08-12 19:17:31 +02:00
|
|
|
return basic_blocks[descriptor];
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-08-05 15:07:27 +02:00
|
|
|
void EmitX64::EmitBreakpoint(IR::Block&, IR::Inst*) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->int3();
|
2016-08-05 15:07:27 +02:00
|
|
|
}
|
|
|
|
|
2016-08-02 12:51:05 +02:00
|
|
|
void EmitX64::EmitIdentity(IR::Block& block, IR::Inst* inst) {
|
2016-08-05 15:11:27 +02:00
|
|
|
if (!inst->GetArg(0).IsImmediate()) {
|
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
|
|
|
}
|
2016-08-02 12:51:05 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::Reg reg = inst->GetArg(0).GetRegRef();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateReg(reg));
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-08-05 19:54:19 +02:00
|
|
|
void EmitX64::EmitGetExtendedRegister32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
|
|
|
ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31);
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
code->movss(result, MJitStateExtReg(reg));
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetExtendedRegister64(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
|
|
|
ASSERT(reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
code->movsd(result, MJitStateExtReg(reg));
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSetRegister(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::Reg reg = inst->GetArg(0).GetRegRef();
|
|
|
|
IR::Value arg = inst->GetArg(1);
|
|
|
|
if (arg.IsImmediate()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(reg), arg.GetU32());
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseGpr(arg).cvt32();
|
|
|
|
code->mov(MJitStateReg(reg), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-08-05 19:54:19 +02:00
|
|
|
void EmitX64::EmitSetExtendedRegister32(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
|
|
|
ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1));
|
|
|
|
code->movss(MJitStateExtReg(reg), source);
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetExtendedRegister64(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
|
|
|
ASSERT(reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1));
|
|
|
|
code->movsd(MJitStateExtReg(reg), source);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetCpsr(IR::Block&, IR::Inst* inst) {
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetCpsr(IR::Block&, IR::Inst* inst) {
|
|
|
|
Xbyak::Reg32 arg = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
|
|
|
code->mov(MJitStateCpsr(), arg);
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitGetNFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 31);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSetNFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 31;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitGetZFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 30);
|
|
|
|
code->and_(result, 1);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSetZFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 30;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitGetCFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 29);
|
|
|
|
code->and_(result, 1);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSetCFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 29;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitGetVFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 28);
|
|
|
|
code->and_(result, 1);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSetVFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 28;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-08-06 23:04:52 +02:00
|
|
|
void EmitX64::EmitOrQFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 27;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1())
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-08-06 23:04:52 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-08-06 23:04:52 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-08-06 23:04:52 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitBXWritePC(IR::Block&, IR::Inst* inst) {
|
|
|
|
const u32 T_bit = 1 << 5;
|
|
|
|
auto arg = inst->GetArg(0);
|
2016-07-18 21:01:48 +02:00
|
|
|
|
|
|
|
// Pseudocode:
|
|
|
|
// if (new_pc & 1) {
|
|
|
|
// new_pc &= 0xFFFFFFFE;
|
|
|
|
// cpsr.T = true;
|
|
|
|
// } else {
|
|
|
|
// new_pc &= 0xFFFFFFFC;
|
|
|
|
// cpsr.T = false;
|
|
|
|
// }
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
u32 new_pc = arg.GetU32();
|
|
|
|
if (Common::Bit<0>(new_pc)) {
|
|
|
|
new_pc &= 0xFFFFFFFE;
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
|
|
|
code->or_(MJitStateCpsr(), T_bit);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
|
|
|
new_pc &= 0xFFFFFFFC;
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
|
|
|
code->and_(MJitStateCpsr(), ~T_bit);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
using Xbyak::util::ptr;
|
|
|
|
|
|
|
|
Xbyak::Reg64 new_pc = reg_alloc.UseScratchGpr(arg);
|
|
|
|
Xbyak::Reg64 tmp1 = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 tmp2 = reg_alloc.ScratchGpr();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(tmp1, MJitStateCpsr());
|
|
|
|
code->mov(tmp2, tmp1);
|
|
|
|
code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0
|
|
|
|
code->or_(tmp1, u32(T_bit)); // CPSR.T = 1
|
|
|
|
code->test(new_pc, u32(1));
|
|
|
|
code->cmove(tmp1, tmp2); // CPSR.T = pc & 1
|
|
|
|
code->mov(MJitStateCpsr(), tmp1);
|
|
|
|
code->lea(tmp2, ptr[new_pc + new_pc * 1]);
|
|
|
|
code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
|
|
|
code->and_(new_pc, tmp2);
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-12 11:58:14 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitCallSupervisor(IR::Block&, IR::Inst* inst) {
|
|
|
|
auto imm32 = inst->GetArg(0);
|
2016-07-14 15:04:43 +02:00
|
|
|
|
|
|
|
reg_alloc.HostCall(nullptr, imm32);
|
|
|
|
|
2016-08-07 23:47:17 +02:00
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(cb.CallSVC);
|
2016-08-07 23:47:17 +02:00
|
|
|
code->SwitchMxcsrOnEntry();
|
2016-07-14 15:04:43 +02:00
|
|
|
}
|
|
|
|
|
2016-08-26 23:47:54 +02:00
|
|
|
static u32 GetFpscrImpl(JitState* jit_state) {
|
|
|
|
return jit_state->Fpscr();
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetFpscr(IR::Block&, IR::Inst* inst) {
|
|
|
|
reg_alloc.HostCall(inst);
|
|
|
|
code->mov(code->ABI_PARAM1, code->r15);
|
|
|
|
|
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(&GetFpscrImpl);
|
2016-08-26 23:47:54 +02:00
|
|
|
code->SwitchMxcsrOnEntry();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void SetFpscrImpl(u32 value, JitState* jit_state) {
|
|
|
|
jit_state->SetFpscr(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetFpscr(IR::Block&, IR::Inst* inst) {
|
|
|
|
auto a = inst->GetArg(0);
|
|
|
|
|
|
|
|
reg_alloc.HostCall(nullptr, a);
|
|
|
|
code->mov(code->ABI_PARAM2, code->r15);
|
|
|
|
|
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(&SetFpscrImpl);
|
2016-08-26 23:47:54 +02:00
|
|
|
code->SwitchMxcsrOnEntry();
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetFpscrNZCV(IR::Block&, IR::Inst* inst) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
|
2016-09-05 15:39:17 +02:00
|
|
|
code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]);
|
2016-08-26 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetFpscrNZCV(IR::Block&, IR::Inst* inst) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
Xbyak::Reg32 value = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
|
|
|
|
2016-09-05 15:39:17 +02:00
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value);
|
2016-08-26 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
2016-08-13 01:10:23 +02:00
|
|
|
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-13 01:10:23 +02:00
|
|
|
ASSERT(inst->GetArg(0).IsImmediate());
|
|
|
|
u64 imm64 = inst->GetArg(0).GetU64();
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
|
|
|
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
2016-08-13 01:10:23 +02:00
|
|
|
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
|
|
|
? u64(unique_hash_to_code_ptr[imm64])
|
|
|
|
: u64(code->GetReturnFromRunCodeAddress());
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
|
|
|
|
code->add(index_reg, 1);
|
|
|
|
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
2016-08-15 16:48:22 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(loc_desc_reg, u64(imm64));
|
|
|
|
CodePtr patch_location = code->getCurr<CodePtr>();
|
2016-08-13 01:52:31 +02:00
|
|
|
patch_unique_hash_locations[imm64].emplace_back(patch_location);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
|
|
|
|
code->EnsurePatchLocationSize(patch_location, 10);
|
2016-08-15 16:48:22 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label label;
|
2016-08-15 16:48:22 +02:00
|
|
|
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
|
|
|
code->je(label, code->T_SHORT);
|
2016-08-15 16:48:22 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
|
|
|
|
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
|
|
|
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
|
|
|
code->L(label);
|
2016-08-13 01:10:23 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitGetCarryFromOp(IR::Block&, IR::Inst*) {
|
2016-08-26 17:43:51 +02:00
|
|
|
ASSERT_MSG(false, "should never happen");
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitGetOverflowFromOp(IR::Block&, IR::Inst*) {
|
2016-08-26 17:43:51 +02:00
|
|
|
ASSERT_MSG(false, "should never happen");
|
2016-07-08 11:09:18 +02:00
|
|
|
}
|
|
|
|
|
2016-08-04 23:04:42 +02:00
|
|
|
void EmitX64::EmitPack2x32To1x64(IR::Block&, IR::Inst* inst) {
|
2016-08-05 19:41:25 +02:00
|
|
|
OpArg lo;
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 02:03:39 +02:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 21:07:08 +02:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
lo = result.cvt32();
|
2016-08-06 02:03:39 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
std::tie(lo, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 02:03:39 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
lo.setBit(32);
|
|
|
|
Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(inst->GetArg(1));
|
2016-08-05 16:27:29 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(hi, 32);
|
|
|
|
code->mov(result.cvt32(), *lo); // Zero extend to 64-bits
|
|
|
|
code->or_(result, hi);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitLeastSignificantWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 16:27:29 +02:00
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2016-08-06 22:03:57 +02:00
|
|
|
void EmitX64::EmitMostSignificantWord(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-08-06 22:03:57 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, 32);
|
2016-08-06 22:03:57 +02:00
|
|
|
|
|
|
|
if (carry_inst) {
|
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 carry = reg_alloc.DefGpr(carry_inst);
|
2016-08-06 22:03:57 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry.cvt8());
|
2016-08-06 22:03:57 +02:00
|
|
|
}
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitLeastSignificantHalf(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:11:27 +02:00
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
2016-07-12 00:06:35 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitLeastSignificantByte(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:11:27 +02:00
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitMostSignificantBit(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2016-07-04 11:22:11 +02:00
|
|
|
// TODO: Flag optimization
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, 31);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitIsZero(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2016-07-04 11:22:11 +02:00
|
|
|
// TODO: Flag optimization
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(result, result);
|
|
|
|
code->sete(result.cvt8());
|
|
|
|
code->movzx(result, result.cvt8());
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-08-04 23:04:42 +02:00
|
|
|
void EmitX64::EmitIsZero64(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-08-04 23:04:42 +02:00
|
|
|
|
|
|
|
// TODO: Flag optimization
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(result, result);
|
|
|
|
code->sete(result.cvt8());
|
|
|
|
code->movzx(result, result.cvt8());
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitLogicalShiftLeft(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-01 15:01:06 +02:00
|
|
|
|
|
|
|
// TODO: Consider using BMI2 instructions like SHLX when arm-in-host flags is implemented.
|
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
|
|
|
reg_alloc.DecrementRemainingUses(inst->GetArg(2).GetInst());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
u8 shift = shift_arg.GetU8();
|
|
|
|
|
|
|
|
if (shift <= 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(result, shift);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(result, shift);
|
|
|
|
code->xor_(zero, zero);
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->cmovnb(result, zero);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift < 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->shl(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
2016-07-23 00:55:00 +02:00
|
|
|
} else if (shift > 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(carry, result);
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->and_(carry, 1);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->ja(".Rs_gt32");
|
|
|
|
code->je(".Rs_eq32");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF < 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0); // Set the carry flag for correct behaviour in the case when Rs & 0xFF == 0
|
|
|
|
code->shl(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF > 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_gt32");
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF == 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_eq32");
|
|
|
|
code->mov(carry, result);
|
|
|
|
code->and_(carry, 1);
|
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitLogicalShiftRight(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-01 15:01:06 +02:00
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
|
|
|
reg_alloc.DecrementRemainingUses(inst->GetArg(2).GetInst());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
u8 shift = shift_arg.GetU8();
|
|
|
|
|
|
|
|
if (shift <= 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, shift);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, shift);
|
|
|
|
code->xor_(zero, zero);
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->cmovnb(result, zero);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift < 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
2016-07-23 00:55:00 +02:00
|
|
|
} else if (shift == 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->mov(result, 0);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->ja(".Rs_gt32");
|
|
|
|
code->je(".Rs_eq32");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(shift, shift);
|
|
|
|
code->jz(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF < 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF > 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_gt32");
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF == 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_eq32");
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-07 15:23:33 +02:00
|
|
|
void EmitX64::EmitLogicalShiftRight64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-08-07 15:23:33 +02:00
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
ASSERT_MSG(shift_arg.IsImmediate(), "variable 64 bit shifts are not implemented");
|
|
|
|
u8 shift = shift_arg.GetU8();
|
|
|
|
ASSERT_MSG(shift < 64, "shift width clamping is not implemented");
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result.cvt64(), shift);
|
2016-08-07 15:23:33 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitArithmeticShiftRight(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-04 11:22:11 +02:00
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
|
|
|
reg_alloc.DecrementRemainingUses(inst->GetArg(2).GetInst());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2016-08-02 13:00:11 +02:00
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sar(result, u8(shift < 31 ? shift : 31));
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 const31 = reg_alloc.ScratchGpr().cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count.
|
|
|
|
|
|
|
|
// We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31.
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(const31, 31);
|
|
|
|
code->movzx(shift, shift.cvt8());
|
|
|
|
code->cmp(shift, u32(31));
|
|
|
|
code->cmovg(shift, const31);
|
|
|
|
code->sar(result, shift.cvt8());
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-04 11:22:11 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift <= 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sar(result, shift);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sar(result, 31);
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
|
|
|
code->cmp(shift, u32(31));
|
|
|
|
code->ja(".Rs_gt31");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(shift, shift);
|
|
|
|
code->jz(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF <= 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sar(result, shift);
|
|
|
|
code->setc(carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF > 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_gt31");
|
|
|
|
code->sar(result, 31); // 31 produces the same results as anything above 31
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-10 02:18:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitRotateRight(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-10 02:18:17 +02:00
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
|
|
|
reg_alloc.DecrementRemainingUses(inst->GetArg(2).GetInst());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-10 02:18:17 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ror(result, u8(shift & 0x1F));
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// x64 ROR instruction does (shift & 0x1F) for us.
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ror(result, shift);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-10 02:18:17 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if ((shift & 0x1F) == 0) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(result, u8(31));
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ror(result, shift);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(shift, shift);
|
|
|
|
code->jz(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(shift.cvt32(), u32(0x1F));
|
|
|
|
code->jz(".zero_1F");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0x1F != 0) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ror(result, shift);
|
|
|
|
code->setc(carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".zero_1F");
|
|
|
|
code->bt(result, u8(31));
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-04 11:22:11 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-31 20:07:35 +02:00
|
|
|
void EmitX64::EmitRotateRightExtended(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-31 20:07:35 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = carry_inst
|
|
|
|
? reg_alloc.UseDefGpr(inst->GetArg(1), carry_inst).cvt8()
|
|
|
|
: reg_alloc.UseGpr(inst->GetArg(1)).cvt8();
|
2016-07-31 20:07:35 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->rcr(result, 1);
|
2016-07-31 20:07:35 +02:00
|
|
|
|
|
|
|
if (carry_inst) {
|
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry);
|
2016-07-31 20:07:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1);
|
|
|
|
|
|
|
|
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, const IR::Value& carry_in, IR::Inst* carry_out) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (carry_in.IsImmediate()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
return carry_out ? reg_alloc.DefGpr(carry_out).cvt8() : INVALID_REG.cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
return carry_out ? reg_alloc.UseDefGpr(carry_in, carry_out).cvt8() : reg_alloc.UseGpr(carry_in).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitAddWithCarry(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
IR::Value carry_in = inst->GetArg(2);
|
2016-07-08 11:09:18 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst);
|
|
|
|
Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8();
|
2016-07-08 11:09:18 +02:00
|
|
|
|
|
|
|
// TODO: Consider using LEA.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->stc();
|
|
|
|
code->adc(result, op_arg);
|
|
|
|
} else {
|
|
|
|
code->add(result, op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->adc(result, op_arg);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->stc();
|
|
|
|
code->adc(result, *op_arg);
|
|
|
|
} else {
|
|
|
|
code->add(result, *op_arg);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->adc(result, *op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-08 11:09:18 +02:00
|
|
|
|
|
|
|
if (carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry);
|
2016-07-08 11:09:18 +02:00
|
|
|
}
|
|
|
|
if (overflow_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->seto(overflow);
|
2016-07-08 11:09:18 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-04 23:04:42 +02:00
|
|
|
void EmitX64::EmitAdd64(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst);
|
|
|
|
Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b);
|
2016-08-04 23:04:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->add(result, op_arg);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSubWithCarry(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
2016-07-08 12:49:30 +02:00
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
IR::Value carry_in = inst->GetArg(2);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst);
|
|
|
|
Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8();
|
2016-07-08 12:49:30 +02:00
|
|
|
|
|
|
|
// TODO: Consider using LEA.
|
2016-07-23 00:55:00 +02:00
|
|
|
// TODO: Optimize CMP case.
|
2016-07-08 12:49:30 +02:00
|
|
|
// Note that x64 CF is inverse of what the ARM carry flag is here.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->sub(result, op_arg);
|
|
|
|
} else {
|
|
|
|
code->stc();
|
|
|
|
code->sbb(result, op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->cmc();
|
|
|
|
code->sbb(result, op_arg);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->sub(result, *op_arg);
|
|
|
|
} else {
|
|
|
|
code->stc();
|
|
|
|
code->sbb(result, *op_arg);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->cmc();
|
|
|
|
code->sbb(result, *op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-08 12:49:30 +02:00
|
|
|
|
|
|
|
if (carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setnc(carry);
|
2016-07-08 12:49:30 +02:00
|
|
|
}
|
|
|
|
if (overflow_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
reg_alloc.DecrementRemainingUses(inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->seto(overflow);
|
2016-07-08 12:49:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-06 07:09:47 +02:00
|
|
|
void EmitX64::EmitSub64(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst);
|
|
|
|
Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b);
|
2016-08-06 07:09:47 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sub(result, op_arg);
|
2016-08-06 07:09:47 +02:00
|
|
|
}
|
|
|
|
|
2016-08-04 23:04:42 +02:00
|
|
|
void EmitX64::EmitMul(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
if (a.IsImmediate())
|
|
|
|
std::swap(a, b);
|
2016-08-05 16:27:29 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
2016-08-04 23:04:42 +02:00
|
|
|
if (b.IsImmediate()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->imul(result, result, b.GetU32());
|
2016-08-04 23:04:42 +02:00
|
|
|
} else {
|
2016-08-05 16:27:29 +02:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
2016-08-24 21:07:08 +02:00
|
|
|
op_arg.setBit(32);
|
|
|
|
|
|
|
|
code->imul(result, *op_arg);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitMul64(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-08-05 16:27:29 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst);
|
2016-08-05 16:27:29 +02:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->imul(result, *op_arg);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitAnd(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-07-08 11:43:28 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
2016-07-08 11:43:28 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(result, op_arg);
|
|
|
|
} else {
|
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
2016-07-08 11:43:28 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(result, *op_arg);
|
|
|
|
}
|
2016-07-08 11:43:28 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitEor(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-07-08 12:14:50 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
2016-07-08 12:14:50 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, op_arg);
|
|
|
|
} else {
|
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
2016-07-08 12:14:50 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, *op_arg);
|
|
|
|
}
|
2016-07-08 12:14:50 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitOr(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-07-10 03:06:38 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
2016-07-10 03:06:38 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(result, op_arg);
|
|
|
|
} else {
|
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
2016-07-10 03:06:38 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(result, *op_arg);
|
|
|
|
}
|
2016-07-10 03:06:38 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitNot(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
2016-07-10 04:44:45 +02:00
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
if (a.IsImmediate()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
2016-07-10 04:44:45 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(result, u32(~a.GetU32()));
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
2016-07-10 04:44:45 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->not_(result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-04 23:04:42 +02:00
|
|
|
void EmitX64::EmitSignExtendWordToLong(IR::Block&, IR::Inst* inst) {
|
2016-08-05 16:27:29 +02:00
|
|
|
OpArg source;
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 02:03:39 +02:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 21:07:08 +02:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 02:03:39 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 02:03:39 +02:00
|
|
|
}
|
2016-08-04 23:04:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
source.setBit(32);
|
|
|
|
code->movsxd(result.cvt64(), *source);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSignExtendHalfToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:11:27 +02:00
|
|
|
OpArg source;
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 02:03:39 +02:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 21:07:08 +02:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 02:03:39 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 02:03:39 +02:00
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
source.setBit(16);
|
|
|
|
code->movsx(result.cvt32(), *source);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitSignExtendByteToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:11:27 +02:00
|
|
|
OpArg source;
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 02:03:39 +02:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 21:07:08 +02:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 02:03:39 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 02:03:39 +02:00
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
source.setBit(8);
|
|
|
|
code->movsx(result.cvt32(), *source);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2016-08-04 23:04:42 +02:00
|
|
|
void EmitX64::EmitZeroExtendWordToLong(IR::Block&, IR::Inst* inst) {
|
2016-08-05 16:27:29 +02:00
|
|
|
OpArg source;
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 02:03:39 +02:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 21:07:08 +02:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 02:03:39 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 02:03:39 +02:00
|
|
|
}
|
2016-08-04 23:04:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
source.setBit(32);
|
|
|
|
code->mov(result.cvt32(), *source); // x64 zeros upper 32 bits on a 32-bit move
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitZeroExtendHalfToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:11:27 +02:00
|
|
|
OpArg source;
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 02:03:39 +02:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 21:07:08 +02:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 02:03:39 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 02:03:39 +02:00
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
source.setBit(16);
|
|
|
|
code->movzx(result.cvt32(), *source);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitZeroExtendByteToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:11:27 +02:00
|
|
|
OpArg source;
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 02:03:39 +02:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 21:07:08 +02:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 02:03:39 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 02:03:39 +02:00
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
source.setBit(8);
|
|
|
|
code->movzx(result.cvt32(), *source);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitByteReverseWord(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bswap(result);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitByteReverseHalf(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg16 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt16();
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->rol(result, 8);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-07-20 16:34:17 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bswap(result);
|
2016-07-20 16:34:17 +02:00
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
2016-08-12 17:53:16 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 arg = reg_alloc.UseGpr(b).cvt32();
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_scratch_b = reg_alloc.ScratchXmm();
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(xmm_scratch_a, result);
|
|
|
|
code->movd(xmm_scratch_b, arg);
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(xmm_scratch_a, xmm_scratch_b);
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(result, xmm_scratch_a);
|
2016-08-12 17:53:16 +02:00
|
|
|
}
|
|
|
|
|
2016-08-12 19:26:14 +02:00
|
|
|
void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
|
2016-08-12 19:26:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSaturatedAddS8(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsb);
|
2016-08-12 19:26:14 +02:00
|
|
|
}
|
|
|
|
|
2016-08-12 19:18:38 +02:00
|
|
|
void EmitX64::EmitPackedSaturatedSubU8(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusb);
|
2016-08-12 19:18:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSaturatedSubS8(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsb);
|
2016-08-12 19:18:38 +02:00
|
|
|
}
|
|
|
|
|
2016-08-12 19:42:16 +02:00
|
|
|
void EmitX64::EmitPackedSaturatedAddU16(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSaturatedAddS16(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSaturatedSubU16(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSaturatedSubS16(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
2016-08-06 18:21:29 +02:00
|
|
|
// We need to report back whether we've found a denormal on input.
|
|
|
|
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
2016-08-24 21:07:08 +02:00
|
|
|
|
|
|
|
code->movd(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_IDC)], u32(1 << 7));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
auto mask = code->MFloatNonSignMask64();
|
|
|
|
mask.setBit(64);
|
|
|
|
auto penult_denormal = code->MFloatPenultimatePositiveDenormal64();
|
|
|
|
penult_denormal.setBit(64);
|
|
|
|
|
|
|
|
code->movq(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, mask);
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, penult_denormal);
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_IDC)], u32(1 << 7));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->movd(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_UFC)], u32(1 << 3));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
auto mask = code->MFloatNonSignMask64();
|
|
|
|
mask.setBit(64);
|
|
|
|
auto penult_denormal = code->MFloatPenultimatePositiveDenormal64();
|
|
|
|
penult_denormal.setBit(64);
|
|
|
|
|
|
|
|
code->movq(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, mask);
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, penult_denormal);
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_UFC)], u32(1 << 3));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->ucomiss(xmm_value, xmm_value);
|
|
|
|
code->jnp(end);
|
|
|
|
code->movaps(xmm_value, code->MFloatNaN32());
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->ucomisd(xmm_value, xmm_value);
|
|
|
|
code->jnp(end);
|
|
|
|
code->movaps(xmm_value, code->MFloatNaN64());
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
2016-08-26 16:23:08 +02:00
|
|
|
static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
|
|
|
code->pxor(xmm_scratch, xmm_scratch);
|
|
|
|
code->cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
|
|
|
code->pand(xmm_value, xmm_scratch);
|
2016-08-24 21:07:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-06 18:21:29 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Xmm operand = reg_alloc.UseXmm(b);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-06 18:21:29 +02:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch);
|
|
|
|
DenormalsAreZero32(code, operand, gpr_scratch);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, operand);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-06 18:21:29 +02:00
|
|
|
FlushToZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN32(code, result);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-06 18:21:29 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Xmm operand = reg_alloc.UseXmm(b);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
|
|
|
DenormalsAreZero64(code, operand, gpr_scratch);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, operand);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN64(code, result);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-07 13:19:07 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-07 13:19:07 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 13:19:07 +02:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 17:34:05 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 13:19:07 +02:00
|
|
|
FlushToZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN32(code, result);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-07 13:19:07 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-07 13:19:07 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
2016-08-25 17:34:05 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN64(code, result);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-07 20:25:12 +02:00
|
|
|
void EmitX64::EmitTransferFromFP32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0));
|
2016-08-07 20:25:12 +02:00
|
|
|
// TODO: Eliminate this.
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(result, source);
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitTransferFromFP64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.DefGpr(inst);
|
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0));
|
2016-08-07 20:25:12 +02:00
|
|
|
// TODO: Eliminate this.
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movq(result, source);
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitTransferToFP32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 source = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
2016-08-07 20:25:12 +02:00
|
|
|
// TODO: Eliminate this.
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(result, source);
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitTransferToFP64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg64 source = reg_alloc.UseGpr(inst->GetArg(0));
|
2016-08-07 20:25:12 +02:00
|
|
|
// TODO: Eliminate this.
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movq(result, source);
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 11:21:14 +02:00
|
|
|
void EmitX64::EmitFPAbs32(IR::Block&, IR::Inst* inst) {
|
2016-08-07 02:41:25 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 02:41:25 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->pand(result, code->MFloatNonSignMask32());
|
2016-08-07 02:41:25 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 11:21:14 +02:00
|
|
|
void EmitX64::EmitFPAbs64(IR::Block&, IR::Inst* inst) {
|
2016-08-07 02:41:25 +02:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 02:41:25 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->pand(result, code->MFloatNonSignMask64());
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 11:56:12 +02:00
|
|
|
void EmitX64::EmitFPNeg32(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 11:56:12 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->pxor(result, code->MFloatNegativeZero32());
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPNeg64(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 11:56:12 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->pxor(result, code->MFloatNegativeZero64());
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 11:21:14 +02:00
|
|
|
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addss);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addsd);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 11:56:12 +02:00
|
|
|
void EmitX64::EmitFPDiv32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divss);
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDiv64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divsd);
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 11:21:14 +02:00
|
|
|
void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulss);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPMul64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulsd);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSqrt32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPTwoOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtss);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSqrt64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPTwoOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtsd);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSub32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subss);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subsd);
|
2016-08-07 02:41:25 +02:00
|
|
|
}
|
|
|
|
|
2016-08-23 23:04:46 +02:00
|
|
|
void EmitX64::EmitFPSingleToDouble(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch.cvt32());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DefaultNaN64(code, result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDoubleToSingle(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2ss(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FlushToZero32(code, result, gpr_scratch.cvt32());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DefaultNaN32(code, result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSingleToS32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 16:23:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// Conversion to double is lossless, and allows for clamping.
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DenormalsAreZero32(code, from, gpr_scratch);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
// Clamp to output range
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// Second time is for real
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSingleToU32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 16:23:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// Conversion to double is lossless, and allows for accurate clamping.
|
|
|
|
//
|
|
|
|
// Since SSE2 doesn't provide an unsigned conversion, we shift the range as appropriate.
|
|
|
|
//
|
|
|
|
// FIXME: Inexact exception not correctly signalled with the below code
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) {
|
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DenormalsAreZero32(code, from, gpr_scratch);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into SSE range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->addsd(from, code->MFloatMinS32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->add(gpr_scratch, u32(2147483648u));
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DenormalsAreZero32(code, from, gpr_scratch);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Generate masks if out-of-signed-range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movaps(xmm_mask, code->MFloatMaxS32());
|
|
|
|
code->cmpltsd(xmm_mask, from);
|
|
|
|
code->movd(gpr_mask, xmm_mask);
|
|
|
|
code->pand(xmm_mask, code->MFloatMinS32());
|
|
|
|
code->and_(gpr_mask, u32(2147483648u));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into range if necessary
|
2016-08-24 21:07:08 +02:00
|
|
|
code->addsd(from, xmm_mask);
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinU32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range if necessary
|
2016-08-24 21:07:08 +02:00
|
|
|
code->add(gpr_scratch, gpr_mask);
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDoubleToS32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 16:23:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
// First time is to set flags
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
// Clamp to output range
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// Second time is for real
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDoubleToU32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// TODO: Use VCVTPD2UDQ when AVX512VL is available.
|
|
|
|
// FIXME: Inexact exception not correctly signalled with the below code
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) {
|
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into SSE range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->addsd(from, code->MFloatMinS32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->add(gpr_scratch, u32(2147483648u));
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Generate masks if out-of-signed-range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movaps(xmm_mask, code->MFloatMaxS32());
|
|
|
|
code->cmpltsd(xmm_mask, from);
|
|
|
|
code->movd(gpr_mask, xmm_mask);
|
|
|
|
code->pand(xmm_mask, code->MFloatMinS32());
|
|
|
|
code->and_(gpr_mask, u32(2147483648u));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into range if necessary
|
2016-08-24 21:07:08 +02:00
|
|
|
code->addsd(from, xmm_mask);
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2016-08-24 21:07:08 +02:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinU32());
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range if necessary
|
2016-08-24 21:07:08 +02:00
|
|
|
code->add(gpr_scratch, gpr_mask);
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPS32ToSingle(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(gpr_scratch, from);
|
|
|
|
code->cvtsi2ss(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPU32ToSingle(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(gpr_scratch, from);
|
|
|
|
code->cvtsi2ss(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPS32ToDouble(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(gpr_scratch, from);
|
|
|
|
code->cvtsi2sd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPU32ToDouble(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movd(gpr_scratch, from);
|
|
|
|
code->cvtsi2sd(to, gpr_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
void EmitX64::EmitClearExclusive(IR::Block&, IR::Inst*) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetExclusive(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
ASSERT(inst->GetArg(1).IsImmediate());
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 address = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(1));
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
template <typename FunctionPointer>
|
|
|
|
static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
|
|
|
if (!cb.page_table) {
|
|
|
|
reg_alloc.HostCall(inst, inst->GetArg(0));
|
|
|
|
code->CallFunction(fn);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
Xbyak::Reg64 result = reg_alloc.DefGpr(inst, { ABI_RETURN });
|
|
|
|
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
|
|
|
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
Xbyak::Label abort, end;
|
|
|
|
|
|
|
|
code->mov(rax, u64(cb.page_table));
|
|
|
|
code->mov(page_index.cvt32(), vaddr);
|
|
|
|
code->shr(page_index.cvt32(), 12);
|
|
|
|
code->mov(rax, qword[rax + page_index * 8]);
|
|
|
|
code->test(rax, rax);
|
|
|
|
code->jz(abort);
|
|
|
|
code->mov(page_offset.cvt32(), vaddr);
|
|
|
|
code->and_(page_offset.cvt32(), 4095);
|
|
|
|
switch (bit_size) {
|
|
|
|
case 8:
|
|
|
|
code->movzx(result, code->byte[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
code->movzx(result, word[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
code->mov(result.cvt32(), dword[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
code->mov(result.cvt64(), qword[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid bit_size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
code->jmp(end);
|
|
|
|
code->L(abort);
|
|
|
|
code->call(code->GetMemoryReadCallback(bit_size));
|
|
|
|
code->L(end);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
template<typename FunctionPointer>
|
|
|
|
static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
|
|
|
if (!cb.page_table) {
|
|
|
|
reg_alloc.HostCall(inst, inst->GetArg(0), inst->GetArg(1));
|
|
|
|
code->CallFunction(fn);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
reg_alloc.ScratchGpr({ HostLoc::RAX });
|
|
|
|
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
|
|
|
Xbyak::Reg64 value = reg_alloc.UseScratchGpr(inst->GetArg(1), { ABI_PARAM2 });
|
|
|
|
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
Xbyak::Label abort, end;
|
|
|
|
|
|
|
|
code->mov(rax, u64(cb.page_table));
|
|
|
|
code->mov(page_index.cvt32(), vaddr);
|
|
|
|
code->shr(page_index.cvt32(), 12);
|
|
|
|
code->mov(rax, qword[rax + page_index * 8]);
|
|
|
|
code->test(rax, rax);
|
|
|
|
code->jz(abort);
|
|
|
|
code->mov(page_offset.cvt32(), vaddr);
|
|
|
|
code->and_(page_offset.cvt32(), 4095);
|
|
|
|
switch (bit_size) {
|
|
|
|
case 8:
|
|
|
|
code->mov(code->byte[rax + page_offset], value.cvt8());
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
code->mov(word[rax + page_offset], value.cvt16());
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
code->mov(dword[rax + page_offset], value.cvt32());
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
code->mov(qword[rax + page_offset], value.cvt64());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid bit_size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
code->jmp(end);
|
|
|
|
code->L(abort);
|
|
|
|
code->call(code->GetMemoryWriteCallback(bit_size));
|
|
|
|
code->L(end);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 8, cb.MemoryRead8);
|
|
|
|
}
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 16, cb.MemoryRead16);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 32, cb.MemoryRead32);
|
|
|
|
}
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 64, cb.MemoryRead64);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) {
|
2016-09-01 01:06:40 +02:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 8, cb.MemoryWrite8);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) {
|
2016-09-01 01:06:40 +02:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 16, cb.MemoryWrite16);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) {
|
2016-09-01 01:06:40 +02:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 32, cb.MemoryWrite32);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) {
|
2016-09-01 01:06:40 +02:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 64, cb.MemoryWrite64);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-08-31 22:09:26 +02:00
|
|
|
template <typename FunctionPointer>
|
|
|
|
static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, FunctionPointer fn) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 passed = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers.
|
|
|
|
|
|
|
|
code->mov(passed, u32(1));
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->je(end);
|
|
|
|
code->mov(tmp, code->ABI_PARAM1);
|
|
|
|
code->xor_(tmp, dword[r15 + offsetof(JitState, exclusive_address)]);
|
|
|
|
code->test(tmp, JitState::RESERVATION_GRANULE_MASK);
|
|
|
|
code->jne(end);
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->CallFunction(fn);
|
|
|
|
code->xor_(passed, passed);
|
|
|
|
code->L(end);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory8(IR::Block&, IR::Inst* inst) {
|
2016-08-31 22:09:26 +02:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.MemoryWrite8);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory16(IR::Block&, IR::Inst* inst) {
|
2016-08-31 22:09:26 +02:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.MemoryWrite16);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory32(IR::Block&, IR::Inst* inst) {
|
2016-08-31 22:09:26 +02:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.MemoryWrite32);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory64(IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 passed = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg64 value_hi = reg_alloc.UseScratchGpr(inst->GetArg(2));
|
|
|
|
Xbyak::Reg64 value = code->ABI_PARAM2;
|
|
|
|
Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers.
|
|
|
|
|
|
|
|
code->mov(passed, u32(1));
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->je(end);
|
|
|
|
code->mov(tmp, code->ABI_PARAM1);
|
|
|
|
code->xor_(tmp, dword[r15 + offsetof(JitState, exclusive_address)]);
|
|
|
|
code->test(tmp, JitState::RESERVATION_GRANULE_MASK);
|
|
|
|
code->jne(end);
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->mov(value.cvt32(), value.cvt32()); // zero extend to 64-bits
|
|
|
|
code->shl(value_hi, 32);
|
|
|
|
code->or_(value, value_hi);
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(cb.MemoryWrite64);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(passed, passed);
|
|
|
|
code->L(end);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2016-07-04 15:37:50 +02:00
|
|
|
void EmitX64::EmitAddCycles(size_t cycles) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
2016-07-04 15:37:50 +02:00
|
|
|
ASSERT(cycles < std::numeric_limits<u32>::max());
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sub(qword[r15 + offsetof(JitState, cycles_remaining)], static_cast<u32>(cycles));
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) {
|
|
|
|
using namespace Xbyak::util;
|
2016-07-14 13:52:53 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label label;
|
2016-07-14 13:52:53 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg32 cpsr = eax;
|
|
|
|
code->mov(cpsr, MJitStateCpsr());
|
2016-07-14 13:52:53 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
constexpr size_t n_shift = 31;
|
|
|
|
constexpr size_t z_shift = 30;
|
|
|
|
constexpr size_t c_shift = 29;
|
|
|
|
constexpr size_t v_shift = 28;
|
|
|
|
constexpr u32 n_mask = 1u << n_shift;
|
|
|
|
constexpr u32 z_mask = 1u << z_shift;
|
|
|
|
constexpr u32 c_mask = 1u << c_shift;
|
|
|
|
constexpr u32 v_mask = 1u << v_shift;
|
2016-07-14 13:52:53 +02:00
|
|
|
|
|
|
|
switch (cond) {
|
2016-08-23 00:40:30 +02:00
|
|
|
case Arm::Cond::EQ: //z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, z_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::NE: //!z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, z_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::CS: //c
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, c_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::CC: //!c
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, c_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::MI: //n
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, n_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::PL: //!n
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, n_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::VS: //v
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, v_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::VC: //!v
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, v_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::HI: { //c & !z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(cpsr, z_mask | c_mask);
|
|
|
|
code->cmp(cpsr, c_mask);
|
|
|
|
code->je(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LS: { //!c | z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(cpsr, z_mask | c_mask);
|
|
|
|
code->cmp(cpsr, c_mask);
|
|
|
|
code->jne(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::GE: { // n == v
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(cpsr, n_mask | v_mask);
|
|
|
|
code->jz(label);
|
|
|
|
code->cmp(cpsr, n_mask | v_mask);
|
|
|
|
code->je(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LT: { // n != v
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label fail;
|
|
|
|
code->and_(cpsr, n_mask | v_mask);
|
|
|
|
code->jz(fail);
|
|
|
|
code->cmp(cpsr, n_mask | v_mask);
|
|
|
|
code->jne(label);
|
|
|
|
code->L(fail);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::GT: { // !z & (n == v)
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg32 tmp1 = ebx;
|
|
|
|
const Xbyak::Reg32 tmp2 = esi;
|
|
|
|
code->mov(tmp1, cpsr);
|
|
|
|
code->mov(tmp2, cpsr);
|
|
|
|
code->shr(tmp1, n_shift);
|
|
|
|
code->shr(tmp2, v_shift);
|
|
|
|
code->shr(cpsr, z_shift);
|
|
|
|
code->xor_(tmp1, tmp2);
|
|
|
|
code->or_(tmp1, cpsr);
|
|
|
|
code->test(tmp1, 1);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LE: { // z | (n != v)
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg32 tmp1 = ebx;
|
|
|
|
const Xbyak::Reg32 tmp2 = esi;
|
|
|
|
code->mov(tmp1, cpsr);
|
|
|
|
code->mov(tmp2, cpsr);
|
|
|
|
code->shr(tmp1, n_shift);
|
|
|
|
code->shr(tmp2, v_shift);
|
|
|
|
code->shr(cpsr, z_shift);
|
|
|
|
code->xor_(tmp1, tmp2);
|
|
|
|
code->or_(tmp1, cpsr);
|
|
|
|
code->test(tmp1, 1);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
2016-08-26 17:43:51 +02:00
|
|
|
ASSERT_MSG(false, "Unknown cond %zu", static_cast<size_t>(cond));
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
2016-07-14 13:52:53 +02:00
|
|
|
}
|
2016-07-18 22:04:39 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
return label;
|
2016-07-18 22:04:39 +02:00
|
|
|
}
|
|
|
|
|
2016-08-18 19:16:18 +02:00
|
|
|
void EmitX64::EmitCondPrelude(const IR::Block& block) {
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.GetCondition() == Arm::Cond::AL) {
|
|
|
|
ASSERT(!block.HasConditionFailedLocation());
|
2016-07-18 22:04:39 +02:00
|
|
|
return;
|
2016-07-14 13:52:53 +02:00
|
|
|
}
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
ASSERT(block.HasConditionFailedLocation());
|
2016-07-18 22:04:39 +02:00
|
|
|
|
2016-08-25 17:34:05 +02:00
|
|
|
Xbyak::Label pass = EmitCond(code, block.GetCondition());
|
2016-08-25 16:35:50 +02:00
|
|
|
EmitAddCycles(block.ConditionFailedCycleCount());
|
|
|
|
EmitTerminalLinkBlock(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(pass);
|
2016-07-14 13:52:53 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
2016-07-07 11:53:09 +02:00
|
|
|
switch (terminal.which()) {
|
|
|
|
case 1:
|
|
|
|
EmitTerminalInterpret(boost::get<IR::Term::Interpret>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 2:
|
|
|
|
EmitTerminalReturnToDispatch(boost::get<IR::Term::ReturnToDispatch>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 3:
|
|
|
|
EmitTerminalLinkBlock(boost::get<IR::Term::LinkBlock>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 4:
|
|
|
|
EmitTerminalLinkBlockFast(boost::get<IR::Term::LinkBlockFast>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 5:
|
|
|
|
EmitTerminalPopRSBHint(boost::get<IR::Term::PopRSBHint>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 6:
|
|
|
|
EmitTerminalIf(boost::get<IR::Term::If>(terminal), initial_location);
|
|
|
|
return;
|
2016-08-15 16:02:08 +02:00
|
|
|
case 7:
|
|
|
|
EmitTerminalCheckHalt(boost::get<IR::Term::CheckHalt>(terminal), initial_location);
|
|
|
|
return;
|
2016-07-07 11:53:09 +02:00
|
|
|
default:
|
2016-08-26 17:43:51 +02:00
|
|
|
ASSERT_MSG(false, "Invalid Terminal. Bad programmer.");
|
2016-07-07 11:53:09 +02:00
|
|
|
return;
|
|
|
|
}
|
2016-07-04 15:37:50 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminalInterpret(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) {
|
2016-08-01 21:03:13 +02:00
|
|
|
ASSERT_MSG(terminal.next.TFlag() == initial_location.TFlag(), "Unimplemented");
|
|
|
|
ASSERT_MSG(terminal.next.EFlag() == initial_location.EFlag(), "Unimplemented");
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(code->ABI_PARAM1.cvt32(), terminal.next.PC());
|
|
|
|
code->mov(code->ABI_PARAM2, reinterpret_cast<u64>(jit_interface));
|
2016-09-01 02:58:19 +02:00
|
|
|
code->mov(code->ABI_PARAM3, reinterpret_cast<u64>(cb.user_arg));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), code->ABI_PARAM1.cvt32());
|
2016-08-07 23:47:17 +02:00
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(cb.InterpreterFallback);
|
2016-08-07 23:47:17 +02:00
|
|
|
code->ReturnFromRunCode(false); // TODO: Check cycles
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminalReturnToDispatch(IR::Term::ReturnToDispatch, IR::LocationDescriptor initial_location) {
|
2016-08-07 19:08:48 +02:00
|
|
|
code->ReturnFromRunCode();
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminalLinkBlock(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-01 21:03:13 +02:00
|
|
|
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
|
|
|
if (terminal.next.TFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 5));
|
2016-07-18 23:18:58 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
|
2016-07-18 23:18:58 +02:00
|
|
|
}
|
|
|
|
}
|
2016-08-01 21:03:13 +02:00
|
|
|
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
|
|
|
if (terminal.next.EFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 9));
|
2016-07-20 16:34:17 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
|
2016-07-20 16:34:17 +02:00
|
|
|
}
|
|
|
|
}
|
2016-08-07 23:47:43 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0);
|
2016-08-07 23:47:43 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
CodePtr patch_location = code->getCurr();
|
|
|
|
patch_jg_locations[terminal.next].emplace_back(patch_location);
|
2016-08-15 15:33:17 +02:00
|
|
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jg(next_bb->code_ptr);
|
2016-08-07 23:47:43 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->EnsurePatchLocationSize(patch_location, 6);
|
|
|
|
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), terminal.next.PC());
|
2016-08-07 19:08:48 +02:00
|
|
|
code->ReturnFromRunCode(); // TODO: Check cycles, Properly do a link
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminalLinkBlockFast(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-15 15:33:17 +02:00
|
|
|
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
|
|
|
if (terminal.next.TFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 5));
|
2016-08-15 15:33:17 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
|
|
|
if (terminal.next.EFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 9));
|
2016-08-15 15:33:17 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
CodePtr patch_location = code->getCurr();
|
|
|
|
patch_jmp_locations[terminal.next].emplace_back(patch_location);
|
2016-08-15 15:33:17 +02:00
|
|
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jmp(next_bb->code_ptr);
|
|
|
|
code->EnsurePatchLocationSize(patch_location, 5);
|
2016-08-15 15:33:17 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), terminal.next.PC());
|
|
|
|
code->jmp(code->GetReturnFromRunCodeAddress());
|
|
|
|
code->nop(3);
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-13 01:10:23 +02:00
|
|
|
// This calculation has to match up with IREmitter::PushRSB
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(ebx, MJitStateCpsr());
|
|
|
|
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
|
|
|
code->and_(ebx, u32((1 << 5) | (1 << 9)));
|
|
|
|
code->shr(ebx, 2);
|
2016-09-05 15:39:17 +02:00
|
|
|
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(rbx, 32);
|
|
|
|
code->or_(rbx, rcx);
|
|
|
|
|
|
|
|
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
2016-08-13 01:10:23 +02:00
|
|
|
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
|
|
|
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
2016-08-13 01:10:23 +02:00
|
|
|
}
|
2016-08-15 15:08:06 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jmp(rax);
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminalIf(IR::Term::If terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label pass = EmitCond(code, terminal.if_);
|
2016-07-18 22:04:39 +02:00
|
|
|
EmitTerminal(terminal.else_, initial_location);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(pass);
|
2016-07-18 22:04:39 +02:00
|
|
|
EmitTerminal(terminal.then_, initial_location);
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminalCheckHalt(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, halt_requested)], u8(0));
|
|
|
|
code->jne(code->GetReturnFromRunCodeAddress());
|
2016-08-15 16:02:08 +02:00
|
|
|
EmitTerminal(terminal.else_, initial_location);
|
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::Patch(IR::LocationDescriptor desc, CodePtr bb) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
const CodePtr save_code_ptr = code->getCurr();
|
2016-08-07 23:11:39 +02:00
|
|
|
|
2016-08-08 16:56:07 +02:00
|
|
|
for (CodePtr location : patch_jg_locations[desc]) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->SetCodePtr(location);
|
|
|
|
code->jg(bb);
|
|
|
|
code->EnsurePatchLocationSize(location, 6);
|
2016-08-07 23:11:39 +02:00
|
|
|
}
|
|
|
|
|
2016-08-15 15:33:17 +02:00
|
|
|
for (CodePtr location : patch_jmp_locations[desc]) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->SetCodePtr(location);
|
|
|
|
code->jmp(bb);
|
|
|
|
code->EnsurePatchLocationSize(location, 5);
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
|
|
|
|
2016-08-13 01:10:23 +02:00
|
|
|
for (CodePtr location : patch_unique_hash_locations[desc.UniqueHash()]) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->SetCodePtr(location);
|
|
|
|
code->mov(rcx, u64(bb));
|
|
|
|
code->EnsurePatchLocationSize(location, 10);
|
2016-08-13 01:10:23 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 23:11:39 +02:00
|
|
|
code->SetCodePtr(save_code_ptr);
|
|
|
|
}
|
|
|
|
|
2016-07-07 13:01:47 +02:00
|
|
|
void EmitX64::ClearCache() {
|
2016-08-18 19:18:44 +02:00
|
|
|
unique_hash_to_code_ptr.clear();
|
|
|
|
patch_unique_hash_locations.clear();
|
2016-07-07 13:01:47 +02:00
|
|
|
basic_blocks.clear();
|
2016-08-08 16:56:07 +02:00
|
|
|
patch_jg_locations.clear();
|
2016-08-15 15:33:17 +02:00
|
|
|
patch_jmp_locations.clear();
|
2016-07-07 13:01:47 +02:00
|
|
|
}
|
|
|
|
|
2016-07-01 15:01:06 +02:00
|
|
|
} // namespace BackendX64
|
|
|
|
} // namespace Dynarmic
|