2016-07-01 15:01:06 +02:00
|
|
|
/* This file is part of the dynarmic project.
|
|
|
|
* Copyright (c) 2016 MerryMage
|
|
|
|
* This software may be used and distributed according to the terms of the GNU
|
|
|
|
* General Public License version 2 or any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
2016-12-31 12:17:47 +01:00
|
|
|
#include <dynarmic/coprocessor.h>
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
#include "backend_x64/abi.h"
|
2016-12-05 05:14:58 +01:00
|
|
|
#include "backend_x64/block_of_code.h"
|
2016-07-01 15:01:06 +02:00
|
|
|
#include "backend_x64/emit_x64.h"
|
2016-08-24 21:07:08 +02:00
|
|
|
#include "backend_x64/jitstate.h"
|
2017-02-16 19:18:29 +01:00
|
|
|
#include "common/address_range.h"
|
2016-12-05 05:11:34 +01:00
|
|
|
#include "common/assert.h"
|
|
|
|
#include "common/bit_util.h"
|
2017-02-16 19:18:29 +01:00
|
|
|
#include "common/common_types.h"
|
2017-02-16 20:40:51 +01:00
|
|
|
#include "common/variant_util.h"
|
2016-09-06 01:52:33 +02:00
|
|
|
#include "frontend/arm/types.h"
|
2016-09-03 22:48:03 +02:00
|
|
|
#include "frontend/ir/basic_block.h"
|
2016-09-05 12:54:09 +02:00
|
|
|
#include "frontend/ir/location_descriptor.h"
|
2016-09-03 22:48:03 +02:00
|
|
|
#include "frontend/ir/microinstruction.h"
|
2016-12-05 05:11:34 +01:00
|
|
|
#include "frontend/ir/opcodes.h"
|
2016-07-01 15:01:06 +02:00
|
|
|
|
|
|
|
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
|
|
|
|
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
|
|
|
|
|
|
|
|
namespace Dynarmic {
|
|
|
|
namespace BackendX64 {
|
|
|
|
|
2017-03-18 18:20:21 +01:00
|
|
|
constexpr u64 f32_negative_zero = 0x80000000u;
|
|
|
|
constexpr u64 f32_nan = 0x7fc00000u;
|
|
|
|
constexpr u64 f32_non_sign_mask = 0x7fffffffu;
|
|
|
|
|
|
|
|
constexpr u64 f64_negative_zero = 0x8000000000000000u;
|
|
|
|
constexpr u64 f64_nan = 0x7ff8000000000000u;
|
|
|
|
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
|
|
|
|
|
|
|
|
constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
|
|
|
|
constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
|
|
|
|
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
|
|
|
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Address MJitStateReg(Arm::Reg reg) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) {
|
|
|
|
using namespace Xbyak::util;
|
2016-09-07 13:08:35 +02:00
|
|
|
if (Arm::IsSingleExtReg(reg)) {
|
2016-08-05 19:54:19 +02:00
|
|
|
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::S0);
|
2016-08-24 21:07:08 +02:00
|
|
|
return dword[r15 + offsetof(JitState, ExtReg) + sizeof(u32) * index];
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
2016-09-07 13:08:35 +02:00
|
|
|
if (Arm::IsDoubleExtReg(reg)) {
|
2016-08-05 19:54:19 +02:00
|
|
|
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::D0);
|
2016-08-24 21:07:08 +02:00
|
|
|
return qword[r15 + offsetof(JitState, ExtReg) + sizeof(u64) * index];
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
ASSERT_MSG(false, "Should never happen.");
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Address MJitStateCpsr() {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
return dword[r15 + offsetof(JitState, Cpsr)];
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
static void EraseInstruction(IR::Block& block, IR::Inst* inst) {
|
2016-08-26 21:38:59 +02:00
|
|
|
block.Instructions().erase(inst);
|
2016-12-22 19:43:11 +01:00
|
|
|
inst->Invalidate();
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-12-05 05:22:56 +01:00
|
|
|
EmitX64::EmitX64(BlockOfCode* code, UserCallbacks cb, Jit* jit_interface)
|
2017-02-04 10:23:19 +01:00
|
|
|
: code(code), cb(cb), jit_interface(jit_interface) {
|
2016-12-05 05:22:56 +01:00
|
|
|
}
|
|
|
|
|
2016-08-26 20:14:25 +02:00
|
|
|
EmitX64::BlockDescriptor EmitX64::Emit(IR::Block& block) {
|
2016-08-27 12:04:43 +02:00
|
|
|
code->align();
|
2017-02-16 19:18:29 +01:00
|
|
|
const u8* const entrypoint = code->getCurr();
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2017-02-16 19:18:29 +01:00
|
|
|
// Start emitting.
|
2016-08-18 19:16:18 +02:00
|
|
|
EmitCondPrelude(block);
|
2016-07-14 13:52:53 +02:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
RegAlloc reg_alloc{code};
|
|
|
|
|
basic_block: Add proxy member functions for the instruction list
Currently basic block kind of acts like a 'dumb struct' which makes things
a little more verbose to write (as opposed to keeping it all in one place,
I guess). It's also a little wonky conceptually, considering a block is
composed of instructions (i.e. 'contains' them).
So providing accessors that make it act more like a container can make working
with algorithms a little nicer. It also makes the API a little more
defined.
Ideally, the list would be only available through a function, but
currently, the pool allocator is exposed, which seems somewhat odd,
considering the block itself should manage its overall allocations
(with placement new, and regular new), rather than putting that
sanitizing directly on the IR emitter (it should just care about emission,
not block state). However, recontaining that can be followed up with,
as it's very trivial to do.
2016-08-21 18:35:30 +02:00
|
|
|
for (auto iter = block.begin(); iter != block.end(); ++iter) {
|
2016-07-23 00:55:00 +02:00
|
|
|
IR::Inst* inst = &*iter;
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2016-07-08 09:28:56 +02:00
|
|
|
// Call the relevant Emit* member function.
|
2016-07-23 00:55:00 +02:00
|
|
|
switch (inst->GetOpcode()) {
|
2016-07-08 09:28:56 +02:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
#define OPCODE(name, type, ...) \
|
|
|
|
case IR::Opcode::name: \
|
|
|
|
EmitX64::Emit##name(reg_alloc, block, inst); \
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
2016-07-08 09:28:56 +02:00
|
|
|
#include "frontend/ir/opcodes.inc"
|
|
|
|
#undef OPCODE
|
|
|
|
|
2016-08-23 00:40:30 +02:00
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid opcode %zu", static_cast<size_t>(inst->GetOpcode()));
|
|
|
|
break;
|
2016-07-08 09:28:56 +02:00
|
|
|
}
|
|
|
|
|
2016-07-01 15:01:06 +02:00
|
|
|
reg_alloc.EndOfAllocScope();
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
reg_alloc.AssertNoMoreUses();
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
EmitAddCycles(block.CycleCount());
|
|
|
|
EmitTerminal(block.GetTerminal(), block.Location());
|
2016-08-27 12:04:43 +02:00
|
|
|
code->int3();
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
const IR::LocationDescriptor descriptor = block.Location();
|
2017-02-16 19:18:29 +01:00
|
|
|
Patch(descriptor, entrypoint);
|
|
|
|
|
|
|
|
const size_t size = static_cast<size_t>(code->getCurr() - entrypoint);
|
|
|
|
EmitX64::BlockDescriptor block_desc{entrypoint, size, block.Location(), block.EndLocation().PC()};
|
|
|
|
block_descriptors.emplace(descriptor.UniqueHash(), block_desc);
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
return block_desc;
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2016-12-05 05:27:05 +01:00
|
|
|
boost::optional<EmitX64::BlockDescriptor> EmitX64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
|
2016-12-19 16:01:49 +01:00
|
|
|
auto iter = block_descriptors.find(descriptor.UniqueHash());
|
|
|
|
if (iter == block_descriptors.end())
|
2016-12-05 05:22:56 +01:00
|
|
|
return boost::none;
|
|
|
|
return boost::make_optional<BlockDescriptor>(iter->second);
|
|
|
|
}
|
|
|
|
|
2017-02-18 22:46:36 +01:00
|
|
|
void EmitX64::EmitVoid(RegAlloc&, IR::Block&, IR::Inst*) {
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitBreakpoint(RegAlloc&, IR::Block&, IR::Inst*) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->int3();
|
2016-08-05 15:07:27 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitIdentity(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (!args[0].IsImmediate()) {
|
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-08-05 15:11:27 +02:00
|
|
|
}
|
2016-08-02 12:51:05 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
Arm::Reg reg = inst->GetArg(0).GetRegRef();
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(result, MJitStateReg(reg));
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-05 19:54:19 +02:00
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 13:08:35 +02:00
|
|
|
ASSERT(Arm::IsSingleExtReg(reg));
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.ScratchXmm();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movss(result, MJitStateExtReg(reg));
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-05 19:54:19 +02:00
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 13:08:35 +02:00
|
|
|
ASSERT(Arm::IsDoubleExtReg(reg));
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
Xbyak::Xmm result = reg_alloc.ScratchXmm();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movsd(result, MJitStateExtReg(reg));
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-07-23 00:55:00 +02:00
|
|
|
Arm::Reg reg = inst->GetArg(0).GetRegRef();
|
2017-02-24 22:25:31 +01:00
|
|
|
if (args[1].IsImmediate()) {
|
|
|
|
code->mov(MJitStateReg(reg), args[1].GetImmediateU32());
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[1]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(reg), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-05 19:54:19 +02:00
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 13:08:35 +02:00
|
|
|
ASSERT(Arm::IsSingleExtReg(reg));
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movss(MJitStateExtReg(reg), source);
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-05 19:54:19 +02:00
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 13:08:35 +02:00
|
|
|
ASSERT(Arm::IsDoubleExtReg(reg));
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->movsd(MJitStateExtReg(reg), source);
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(result, MJitStateCpsr());
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-24 21:07:08 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg32 arg = reg_alloc.UseGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateCpsr(), arg);
|
2016-08-05 19:54:19 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 31);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
constexpr size_t flag_bit = 31;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate()) {
|
|
|
|
if (args[0].GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 30);
|
|
|
|
code->and_(result, 1);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
constexpr size_t flag_bit = 30;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate()) {
|
|
|
|
if (args[0].GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 29);
|
|
|
|
code->and_(result, 1);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
constexpr size_t flag_bit = 29;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate()) {
|
|
|
|
if (args[0].GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 28);
|
|
|
|
code->and_(result, 1);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
constexpr size_t flag_bit = 28;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate()) {
|
|
|
|
if (args[0].GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-06 23:04:52 +02:00
|
|
|
constexpr size_t flag_bit = 27;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate()) {
|
|
|
|
if (args[0].GetImmediateU1())
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-08-06 23:04:52 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-06 23:04:52 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-08-06 23:04:52 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-11-23 20:44:27 +01:00
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 16);
|
|
|
|
code->and_(result, 0xF);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-11-23 20:44:27 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-11-23 20:44:27 +01:00
|
|
|
constexpr size_t flag_bit = 16;
|
|
|
|
constexpr u32 flag_mask = 0xFu << flag_bit;
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate()) {
|
|
|
|
u32 imm = (args[0].GetImmediateU32() << flag_bit) & flag_mask;
|
2016-11-23 20:44:27 +01:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), imm);
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-11-23 20:44:27 +01:00
|
|
|
|
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(to_store, flag_mask);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& arg = args[0];
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
const u32 T_bit = 1 << 5;
|
2016-07-18 21:01:48 +02:00
|
|
|
|
|
|
|
// Pseudocode:
|
|
|
|
// if (new_pc & 1) {
|
|
|
|
// new_pc &= 0xFFFFFFFE;
|
|
|
|
// cpsr.T = true;
|
|
|
|
// } else {
|
|
|
|
// new_pc &= 0xFFFFFFFC;
|
|
|
|
// cpsr.T = false;
|
|
|
|
// }
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
if (arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
u32 new_pc = arg.GetImmediateU32();
|
2016-07-23 00:55:00 +02:00
|
|
|
if (Common::Bit<0>(new_pc)) {
|
|
|
|
new_pc &= 0xFFFFFFFE;
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
|
|
|
code->or_(MJitStateCpsr(), T_bit);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
|
|
|
new_pc &= 0xFFFFFFFC;
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
|
|
|
code->and_(MJitStateCpsr(), ~T_bit);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
using Xbyak::util::ptr;
|
|
|
|
|
|
|
|
Xbyak::Reg64 new_pc = reg_alloc.UseScratchGpr(arg);
|
|
|
|
Xbyak::Reg64 tmp1 = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 tmp2 = reg_alloc.ScratchGpr();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(tmp1, MJitStateCpsr());
|
|
|
|
code->mov(tmp2, tmp1);
|
|
|
|
code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0
|
|
|
|
code->or_(tmp1, u32(T_bit)); // CPSR.T = 1
|
|
|
|
code->test(new_pc, u32(1));
|
|
|
|
code->cmove(tmp1, tmp2); // CPSR.T = pc & 1
|
|
|
|
code->mov(MJitStateCpsr(), tmp1);
|
|
|
|
code->lea(tmp2, ptr[new_pc + new_pc * 1]);
|
|
|
|
code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
|
|
|
code->and_(new_pc, tmp2);
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-12 11:58:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.HostCall(nullptr, args[0]);
|
2016-07-14 15:04:43 +02:00
|
|
|
|
2016-08-07 23:47:17 +02:00
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(cb.CallSVC);
|
2016-08-07 23:47:17 +02:00
|
|
|
code->SwitchMxcsrOnEntry();
|
2016-07-14 15:04:43 +02:00
|
|
|
}
|
|
|
|
|
2016-08-26 23:47:54 +02:00
|
|
|
static u32 GetFpscrImpl(JitState* jit_state) {
|
|
|
|
return jit_state->Fpscr();
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-26 23:47:54 +02:00
|
|
|
reg_alloc.HostCall(inst);
|
|
|
|
code->mov(code->ABI_PARAM1, code->r15);
|
|
|
|
|
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(&GetFpscrImpl);
|
2016-08-26 23:47:54 +02:00
|
|
|
code->SwitchMxcsrOnEntry();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void SetFpscrImpl(u32 value, JitState* jit_state) {
|
|
|
|
jit_state->SetFpscr(value);
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.HostCall(nullptr, args[0]);
|
2016-08-26 23:47:54 +02:00
|
|
|
code->mov(code->ABI_PARAM2, code->r15);
|
|
|
|
|
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(&SetFpscrImpl);
|
2016-08-26 23:47:54 +02:00
|
|
|
code->SwitchMxcsrOnEntry();
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-26 23:47:54 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-09-05 15:39:17 +02:00
|
|
|
code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-26 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-26 23:47:54 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32();
|
2016-08-26 23:47:54 +02:00
|
|
|
|
2016-09-05 15:39:17 +02:00
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value);
|
2016-08-26 23:47:54 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
ASSERT(args[0].IsImmediate());
|
|
|
|
u64 unique_hash_of_target = args[0].GetImmediateU64();
|
2016-12-19 16:01:49 +01:00
|
|
|
|
|
|
|
auto iter = block_descriptors.find(unique_hash_of_target);
|
|
|
|
CodePtr target_code_ptr = iter != block_descriptors.end()
|
2017-02-16 19:18:29 +01:00
|
|
|
? iter->second.entrypoint
|
2016-12-19 16:01:49 +01:00
|
|
|
: code->GetReturnFromRunCodeAddress();
|
2016-08-13 01:10:23 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
|
|
|
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
2016-08-13 01:10:23 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
|
|
|
|
code->add(index_reg, 1);
|
|
|
|
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
2016-08-15 16:48:22 +02:00
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
code->mov(loc_desc_reg, unique_hash_of_target);
|
|
|
|
|
|
|
|
patch_information[unique_hash_of_target].mov_rcx.emplace_back(code->getCurr());
|
|
|
|
EmitPatchMovRcx(target_code_ptr);
|
2016-08-15 16:48:22 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label label;
|
2016-08-15 16:48:22 +02:00
|
|
|
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
|
|
|
code->je(label, code->T_SHORT);
|
2016-08-15 16:48:22 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
|
|
|
|
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
|
|
|
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
|
|
|
code->L(label);
|
2016-08-13 01:10:23 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetCarryFromOp(RegAlloc&, IR::Block&, IR::Inst*) {
|
2016-08-26 17:43:51 +02:00
|
|
|
ASSERT_MSG(false, "should never happen");
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetOverflowFromOp(RegAlloc&, IR::Block&, IR::Inst*) {
|
2016-08-26 17:43:51 +02:00
|
|
|
ASSERT_MSG(false, "should never happen");
|
2016-07-08 11:09:18 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitGetGEFromOp(RegAlloc&, IR::Block&, IR::Inst*) {
|
2016-12-04 21:52:06 +01:00
|
|
|
ASSERT_MSG(false, "should never happen");
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPack2x32To1x64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 lo = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(args[1]);
|
2016-08-05 16:27:29 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(hi, 32);
|
2017-02-24 22:25:31 +01:00
|
|
|
code->mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits
|
|
|
|
code->or_(lo, hi);
|
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, lo);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitLeastSignificantWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitMostSignificantWord(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, 32);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-06 22:03:57 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-08-06 22:03:57 +02:00
|
|
|
if (carry_inst) {
|
|
|
|
EraseInstruction(block, carry_inst);
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg64 carry = reg_alloc.ScratchGpr();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry.cvt8());
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-08-06 22:03:57 +02:00
|
|
|
}
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitLeastSignificantHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-07-12 00:06:35 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitLeastSignificantByte(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitMostSignificantBit(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
2016-07-04 11:22:11 +02:00
|
|
|
// TODO: Flag optimization
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, 31);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitIsZero(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
2016-07-04 11:22:11 +02:00
|
|
|
// TODO: Flag optimization
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(result, result);
|
|
|
|
code->sete(result.cvt8());
|
|
|
|
code->movzx(result, result.cvt8());
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitIsZero64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
2016-08-04 23:04:42 +02:00
|
|
|
// TODO: Flag optimization
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(result, result);
|
|
|
|
code->sete(result.cvt8());
|
|
|
|
code->movzx(result, result.cvt8());
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& operand_arg = args[0];
|
|
|
|
auto& shift_arg = args[1];
|
|
|
|
auto& carry_arg = args[2];
|
|
|
|
|
2016-07-01 15:01:06 +02:00
|
|
|
// TODO: Consider using BMI2 instructions like SHLX when arm-in-host flags is implemented.
|
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift <= 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(result, shift);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.Use(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->shl(result, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(zero, zero);
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cmp(code->cl, 32);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmovnb(result, zero);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift < 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->shl(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
2016-07-23 00:55:00 +02:00
|
|
|
} else if (shift > 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(carry, result);
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->and_(carry, 1);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.Use(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cmp(code->cl, 32);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ja(".Rs_gt32");
|
|
|
|
code->je(".Rs_eq32");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF < 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0); // Set the carry flag for correct behaviour in the case when Rs & 0xFF == 0
|
2017-02-24 22:25:31 +01:00
|
|
|
code->shl(result, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF > 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_gt32");
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF == 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_eq32");
|
|
|
|
code->mov(carry, result);
|
|
|
|
code->and_(carry, 1);
|
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-01 15:01:06 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& operand_arg = args[0];
|
|
|
|
auto& shift_arg = args[1];
|
|
|
|
auto& carry_arg = args[2];
|
|
|
|
|
2016-07-01 15:01:06 +02:00
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift <= 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, shift);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.Use(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->shr(result, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(zero, zero);
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cmp(code->cl, 32);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmovnb(result, zero);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift < 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
2016-07-23 00:55:00 +02:00
|
|
|
} else if (shift == 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->mov(result, 0);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.Use(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cmp(code->cl, 32);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ja(".Rs_gt32");
|
|
|
|
code->je(".Rs_eq32");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2017-02-24 22:25:31 +01:00
|
|
|
code->test(code->cl, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jz(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF < 32) {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->shr(result, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF > 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_gt32");
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF == 32) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_eq32");
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->xor_(result, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitLogicalShiftRight64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& operand_arg = args[0];
|
|
|
|
auto& shift_arg = args[1];
|
2016-08-07 15:23:33 +02:00
|
|
|
|
|
|
|
ASSERT_MSG(shift_arg.IsImmediate(), "variable 64 bit shifts are not implemented");
|
2017-02-24 22:25:31 +01:00
|
|
|
ASSERT_MSG(shift_arg.GetImmediateU8() < 64, "shift width clamping is not implemented");
|
|
|
|
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(operand_arg);
|
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
2016-08-07 15:23:33 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shr(result.cvt64(), shift);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-07 15:23:33 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& operand_arg = args[0];
|
|
|
|
auto& shift_arg = args[1];
|
|
|
|
auto& carry_arg = args[2];
|
|
|
|
|
2016-07-04 11:22:11 +02:00
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sar(result, u8(shift < 31 ? shift : 31));
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.UseScratch(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 const31 = reg_alloc.ScratchGpr().cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count.
|
|
|
|
|
|
|
|
// We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31.
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(const31, 31);
|
2017-02-24 22:25:31 +01:00
|
|
|
code->movzx(code->ecx, code->cl);
|
|
|
|
code->cmp(code->ecx, u32(31));
|
|
|
|
code->cmovg(code->ecx, const31);
|
|
|
|
code->sar(result, code->cl);
|
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-04 11:22:11 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift <= 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sar(result, shift);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sar(result, 31);
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.Use(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cmp(code->cl, u32(31));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ja(".Rs_gt31");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2017-02-24 22:25:31 +01:00
|
|
|
code->test(code->cl, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jz(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF <= 31) {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->sar(result, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else if (Rs & 0xFF > 31) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".Rs_gt31");
|
|
|
|
code->sar(result, 31); // 31 produces the same results as anything above 31
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-10 02:18:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-10 02:18:17 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& operand_arg = args[0];
|
|
|
|
auto& shift_arg = args[1];
|
|
|
|
auto& carry_arg = args[2];
|
|
|
|
|
2016-07-10 02:18:17 +02:00
|
|
|
if (!carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
2016-07-10 02:18:17 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ror(result, u8(shift & 0x1F));
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.Use(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// x64 ROR instruction does (shift & 0x1F) for us.
|
2017-02-24 22:25:31 +01:00
|
|
|
code->ror(result, code->cl);
|
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-10 02:18:17 +02:00
|
|
|
} else {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
u8 shift = shift_arg.GetImmediateU8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if ((shift & 0x1F) == 0) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(result, u8(31));
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->ror(result, shift);
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.UseScratch(shift_arg, HostLoc::RCX);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
|
|
|
|
// TODO: Optimize
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2017-02-24 22:25:31 +01:00
|
|
|
code->test(code->cl, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jz(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->and_(code->ecx, u32(0x1F));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jz(".zero_1F");
|
2016-07-23 00:55:00 +02:00
|
|
|
// if (Rs & 0x1F != 0) {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->ror(result, code->cl);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry);
|
|
|
|
code->jmp(".end");
|
2016-07-23 00:55:00 +02:00
|
|
|
// } else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".zero_1F");
|
|
|
|
code->bt(result, u8(31));
|
|
|
|
code->setc(carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
// }
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-04 11:22:11 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitRotateRightExtended(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-31 20:07:35 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(args[1]).cvt8();
|
2016-07-31 20:07:35 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->rcr(result, 1);
|
2016-07-31 20:07:35 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
|
2016-07-31 20:07:35 +02:00
|
|
|
if (carry_inst) {
|
|
|
|
EraseInstruction(block, carry_inst);
|
2016-12-22 19:43:11 +01:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-31 20:07:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) {
|
2016-07-23 00:55:00 +02:00
|
|
|
if (carry_in.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
return carry_out ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
return carry_out ? reg_alloc.UseScratchGpr(carry_in).cvt8() : reg_alloc.UseGpr(carry_in).cvt8();
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& carry_in = args[2];
|
2016-07-08 11:09:18 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst);
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8();
|
2016-07-08 11:09:18 +02:00
|
|
|
|
|
|
|
// TODO: Consider using LEA.
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
if (args[1].IsImmediate()) {
|
|
|
|
u32 op_arg = args[1].GetImmediateU32();
|
2016-08-24 21:07:08 +02:00
|
|
|
if (carry_in.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
if (carry_in.GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->stc();
|
|
|
|
code->adc(result, op_arg);
|
|
|
|
} else {
|
|
|
|
code->add(result, op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->adc(result, op_arg);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
op_arg.setBit(32);
|
|
|
|
if (carry_in.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
if (carry_in.GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->stc();
|
|
|
|
code->adc(result, *op_arg);
|
|
|
|
} else {
|
|
|
|
code->add(result, *op_arg);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->adc(result, *op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-08 11:09:18 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
|
2016-07-08 11:09:18 +02:00
|
|
|
if (carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setc(carry);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-08 11:09:18 +02:00
|
|
|
}
|
|
|
|
if (overflow_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, overflow_inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->seto(overflow);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(overflow_inst, overflow);
|
2016-07-08 11:09:18 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitAdd64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-04 23:04:42 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]);
|
2016-08-04 23:04:42 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->add(result, op_arg);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-25 22:08:47 +02:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
2016-07-08 12:49:30 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
auto& carry_in = args[2];
|
2016-07-23 00:55:00 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst);
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8();
|
2016-07-08 12:49:30 +02:00
|
|
|
|
|
|
|
// TODO: Consider using LEA.
|
2016-07-23 00:55:00 +02:00
|
|
|
// TODO: Optimize CMP case.
|
2016-07-08 12:49:30 +02:00
|
|
|
// Note that x64 CF is inverse of what the ARM carry flag is here.
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
if (args[1].IsImmediate()) {
|
|
|
|
u32 op_arg = args[1].GetImmediateU32();
|
2016-08-24 21:07:08 +02:00
|
|
|
if (carry_in.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
if (carry_in.GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sub(result, op_arg);
|
|
|
|
} else {
|
|
|
|
code->stc();
|
|
|
|
code->sbb(result, op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->cmc();
|
|
|
|
code->sbb(result, op_arg);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
op_arg.setBit(32);
|
|
|
|
if (carry_in.IsImmediate()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
if (carry_in.GetImmediateU1()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sub(result, *op_arg);
|
|
|
|
} else {
|
|
|
|
code->stc();
|
|
|
|
code->sbb(result, *op_arg);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->cmc();
|
|
|
|
code->sbb(result, *op_arg);
|
|
|
|
}
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-08 12:49:30 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
|
2016-07-08 12:49:30 +02:00
|
|
|
if (carry_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->setnc(carry);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(carry_inst, carry);
|
2016-07-08 12:49:30 +02:00
|
|
|
}
|
|
|
|
if (overflow_inst) {
|
2016-07-23 00:55:00 +02:00
|
|
|
EraseInstruction(block, overflow_inst);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->seto(overflow);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(overflow_inst, overflow);
|
2016-07-08 12:49:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSub64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-06 07:09:47 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]);
|
2016-08-06 07:09:47 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sub(result, op_arg);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-06 07:09:47 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitMul(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
if (args[1].IsImmediate()) {
|
|
|
|
code->imul(result, result, args[1].GetImmediateU32());
|
2016-08-04 23:04:42 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
op_arg.setBit(32);
|
|
|
|
|
|
|
|
code->imul(result, *op_arg);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitMul64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-05 16:27:29 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(args[1]);
|
2016-08-05 16:27:29 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->imul(result, *op_arg);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitAnd(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-07-08 11:43:28 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
if (args[1].IsImmediate()) {
|
|
|
|
u32 op_arg = args[1].GetImmediateU32();
|
2016-07-08 11:43:28 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(result, op_arg);
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
op_arg.setBit(32);
|
2016-07-08 11:43:28 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(result, *op_arg);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-08 11:43:28 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitEor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-07-08 12:14:50 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
if (args[1].IsImmediate()) {
|
|
|
|
u32 op_arg = args[1].GetImmediateU32();
|
2016-07-08 12:14:50 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, op_arg);
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
op_arg.setBit(32);
|
2016-07-08 12:14:50 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->xor_(result, *op_arg);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-08 12:14:50 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitOr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-07-10 03:06:38 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
if (args[1].IsImmediate()) {
|
|
|
|
u32 op_arg = args[1].GetImmediateU32();
|
2016-07-10 03:06:38 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(result, op_arg);
|
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
op_arg.setBit(32);
|
2016-07-10 03:06:38 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(result, *op_arg);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-10 03:06:38 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitNot(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-07-10 04:44:45 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result;
|
|
|
|
if (args[0].IsImmediate()) {
|
|
|
|
result = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
code->mov(result, u32(~args[0].GetImmediateU32()));
|
2016-07-23 00:55:00 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->not_(result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-23 00:55:00 +02:00
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSignExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
code->movsxd(result.cvt64(), result.cvt32());
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSignExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
code->movsx(result.cvt32(), result.cvt16());
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSignExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
code->movsx(result.cvt32(), result.cvt8());
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitZeroExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
code->mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-04 23:04:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitZeroExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
code->movzx(result.cvt32(), result.cvt16());
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitZeroExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
|
|
|
code->movzx(result.cvt32(), result.cvt8());
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitByteReverseWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bswap(result);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitByteReverseHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg16 result = reg_alloc.UseScratchGpr(args[0]).cvt16();
|
2016-08-24 21:07:08 +02:00
|
|
|
code->rol(result, 8);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-16 20:23:42 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitByteReverseDual(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->bswap(result);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-07-20 16:34:17 +02:00
|
|
|
}
|
2016-07-16 20:23:42 +02:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCountLeadingZeros(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-15 23:33:20 +01:00
|
|
|
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 source = reg_alloc.UseGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-12-15 23:33:20 +01:00
|
|
|
|
|
|
|
code->lzcnt(result, source);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-12-15 23:33:20 +01:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
2016-12-15 23:33:20 +01:00
|
|
|
|
|
|
|
// The result of a bsr of zero is undefined, but zf is set after it.
|
|
|
|
code->bsr(result, source);
|
|
|
|
code->mov(source, 0xFFFFFFFF);
|
|
|
|
code->cmovz(result, source);
|
|
|
|
code->neg(result);
|
|
|
|
code->add(result, 31);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-12-15 23:33:20 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSignedSaturatedAdd(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-12-15 23:33:20 +01:00
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-15 23:33:20 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 addend = reg_alloc.UseGpr(args[1]).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32();
|
2016-12-15 23:33:20 +01:00
|
|
|
|
|
|
|
code->mov(overflow, result);
|
|
|
|
code->shr(overflow, 31);
|
|
|
|
code->add(overflow, 0x7FFFFFFF);
|
|
|
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
|
|
|
code->add(result, addend);
|
|
|
|
code->cmovo(result, overflow);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
|
2016-12-15 23:33:20 +01:00
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
|
|
|
|
code->seto(overflow.cvt8());
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(overflow_inst, overflow);
|
2016-12-15 23:33:20 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSignedSaturatedSub(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-12-15 23:33:20 +01:00
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-15 23:33:20 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 subend = reg_alloc.UseGpr(args[1]).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32();
|
2016-12-15 23:33:20 +01:00
|
|
|
|
|
|
|
code->mov(overflow, result);
|
|
|
|
code->shr(overflow, 31);
|
|
|
|
code->add(overflow, 0x7FFFFFFF);
|
|
|
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
|
|
|
code->sub(result, subend);
|
|
|
|
code->cmovo(result, overflow);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
|
2016-12-15 23:33:20 +01:00
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
|
|
|
|
code->seto(overflow.cvt8());
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(overflow_inst, overflow);
|
2016-12-15 23:33:20 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitUnsignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-12-21 15:16:48 +01:00
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
size_t N = args[1].GetImmediateU8();
|
2016-12-21 15:16:48 +01:00
|
|
|
ASSERT(N <= 31);
|
|
|
|
|
|
|
|
u32 saturated_value = (1u << N) - 1;
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32();
|
2016-12-21 15:16:48 +01:00
|
|
|
|
|
|
|
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
|
|
|
code->xor_(overflow, overflow);
|
|
|
|
code->cmp(reg_a, saturated_value);
|
|
|
|
code->mov(result, saturated_value);
|
|
|
|
code->cmovle(result, overflow);
|
|
|
|
code->cmovbe(result, reg_a);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
|
2016-12-21 15:16:48 +01:00
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
|
|
|
|
code->seta(overflow.cvt8());
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(overflow_inst, overflow);
|
2016-12-21 15:16:48 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-12-21 15:16:48 +01:00
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
size_t N = args[1].GetImmediateU8();
|
2016-12-21 15:16:48 +01:00
|
|
|
ASSERT(N >= 1 && N <= 32);
|
|
|
|
|
|
|
|
if (N == 32) {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-12-21 15:16:48 +01:00
|
|
|
if (overflow_inst) {
|
|
|
|
auto no_overflow = IR::Value(false);
|
|
|
|
overflow_inst->ReplaceUsesWith(no_overflow);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 mask = (1u << N) - 1;
|
|
|
|
u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
|
|
|
u32 negative_saturated_value = 1u << (N - 1);
|
|
|
|
u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32();
|
2016-12-21 15:16:48 +01:00
|
|
|
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
|
|
|
code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]);
|
|
|
|
|
|
|
|
// Put the appropriate saturated value in result
|
|
|
|
code->cmp(reg_a, positive_saturated_value);
|
|
|
|
code->mov(tmp, positive_saturated_value);
|
|
|
|
code->mov(result, sext_negative_satured_value);
|
|
|
|
code->cmovg(result, tmp);
|
|
|
|
|
|
|
|
// Do the saturation
|
|
|
|
code->cmp(overflow, mask);
|
|
|
|
code->cmovbe(result, reg_a);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
|
2016-12-21 15:16:48 +01:00
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
|
|
|
|
code->seta(overflow.cvt8());
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(overflow_inst, overflow);
|
2016-12-21 15:16:48 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-20 23:05:51 +01:00
|
|
|
/**
|
|
|
|
* Extracts the most significant bits from each of the packed bytes, and packs them together.
|
|
|
|
*
|
|
|
|
* value before: a-------b-------c-------d-------
|
|
|
|
* value after: 0000000000000000000000000000abcd
|
|
|
|
*
|
|
|
|
* @param value The register containing the value to operate on. Result will be stored in the same register.
|
|
|
|
* @param a_tmp A register which can be used as a scratch register.
|
|
|
|
*/
|
2016-12-18 17:25:41 +01:00
|
|
|
static void ExtractMostSignificantBitFromPackedBytes(const Xbyak::util::Cpu& cpu_info, BlockOfCode* code, RegAlloc& reg_alloc, Xbyak::Reg32 value, boost::optional<Xbyak::Reg32> a_tmp = boost::none) {
|
|
|
|
if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) {
|
|
|
|
Xbyak::Reg32 tmp = a_tmp ? *a_tmp : reg_alloc.ScratchGpr().cvt32();
|
|
|
|
code->mov(tmp, 0x80808080);
|
|
|
|
code->pext(value, value, tmp);
|
|
|
|
} else {
|
|
|
|
code->and_(value, 0x80808080);
|
|
|
|
code->imul(value, value, 0x00204081);
|
|
|
|
code->shr(value, 28);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-20 23:06:14 +01:00
|
|
|
/**
|
|
|
|
* Extracts the most significant bits from each of the packed words, duplicates them, and packs them together.
|
|
|
|
*
|
|
|
|
* value before: a---------------b---------------
|
|
|
|
* value after: 0000000000000000000000000000aabb
|
|
|
|
*
|
|
|
|
* @param value The register containing the value to operate on. Result will be stored in the same register.
|
|
|
|
*/
|
2016-12-18 17:25:41 +01:00
|
|
|
static void ExtractAndDuplicateMostSignificantBitFromPackedWords(BlockOfCode* code, Xbyak::Reg32 value) {
|
|
|
|
code->and_(value, 0x80008000);
|
|
|
|
code->shr(value, 1);
|
|
|
|
code->imul(value, value, 0xC003);
|
|
|
|
code->shr(value, 28);
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedAddU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-04 21:52:33 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->paddb(xmm_a, xmm_b);
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2016-12-04 21:52:33 +01:00
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Xmm tmp = reg_alloc.ScratchXmm();
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->movdqa(tmp, xmm_a);
|
|
|
|
code->pminub(tmp, xmm_b);
|
|
|
|
code->pcmpeqb(tmp, xmm_b);
|
|
|
|
code->movd(reg_ge, tmp);
|
|
|
|
code->not_(reg_ge);
|
2016-08-12 17:53:16 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedAddS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-18 17:25:41 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-18 17:25:41 +01:00
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
|
|
|
|
Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->movdqa(saturated_sum, xmm_a);
|
|
|
|
code->paddsb(saturated_sum, xmm_b);
|
|
|
|
code->movd(reg_ge, saturated_sum);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->paddb(xmm_a, xmm_b);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-18 17:25:41 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-18 17:25:41 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->paddw(xmm_a, xmm_b);
|
2016-12-18 17:25:41 +01:00
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Xmm tmp = reg_alloc.ScratchXmm();
|
2016-12-18 17:25:41 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->movdqa(tmp, xmm_a);
|
|
|
|
code->pminuw(tmp, xmm_b);
|
|
|
|
code->pcmpeqw(tmp, xmm_b);
|
|
|
|
code->movd(reg_ge, tmp);
|
|
|
|
code->not_(reg_ge);
|
2016-12-18 17:25:41 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-18 17:25:41 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-18 17:25:41 +01:00
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
2016-12-18 17:25:41 +01:00
|
|
|
Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->movdqa(saturated_sum, xmm_a);
|
|
|
|
code->paddsw(saturated_sum, xmm_b);
|
|
|
|
code->movd(reg_ge, saturated_sum);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->paddw(xmm_a, xmm_b);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-04 21:52:33 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-08-12 17:53:16 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSubU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-05 01:27:59 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-05 01:27:59 +01:00
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
|
|
|
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
2016-12-05 01:27:59 +01:00
|
|
|
|
2016-12-05 01:56:56 +01:00
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
2016-12-05 01:27:59 +01:00
|
|
|
code->pmaxub(xmm_ge, xmm_b);
|
|
|
|
code->pcmpeqb(xmm_ge, xmm_a);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-05 01:27:59 +01:00
|
|
|
code->psubb(xmm_a, xmm_b);
|
|
|
|
|
|
|
|
if (ge_inst) {
|
2016-12-18 17:25:41 +01:00
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSubS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-18 17:25:41 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-18 17:25:41 +01:00
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
2016-12-22 19:43:11 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
|
|
|
code->psubsb(xmm_ge, xmm_b);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->psubb(xmm_a, xmm_b);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-18 17:25:41 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-18 17:25:41 +01:00
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
2016-12-18 17:25:41 +01:00
|
|
|
|
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
|
|
|
code->pmaxuw(xmm_ge, xmm_b);
|
|
|
|
code->pcmpeqw(xmm_ge, xmm_a);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->psubw(xmm_a, xmm_b);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
if (ge_inst) {
|
|
|
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-18 17:25:41 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-18 17:25:41 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-18 17:25:41 +01:00
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
|
|
|
code->psubsw(xmm_ge, xmm_b);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
code->psubw(xmm_a, xmm_b);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
2016-12-18 17:25:41 +01:00
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
2016-12-05 01:27:59 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-05 01:27:59 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-11-25 21:32:22 +01:00
|
|
|
|
|
|
|
// This code path requires SSSE3 because of the PSHUFB instruction.
|
|
|
|
// A fallback implementation is provided below.
|
|
|
|
if (cpu_info.has(Xbyak::util::Cpu::tSSSE3)) {
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseScratchXmm(args[1]);
|
2016-11-25 21:32:22 +01:00
|
|
|
|
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg64 mask = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
// Set the mask to expand the values
|
|
|
|
// 0xAABBCCDD becomes 0x00AA00BB00CC00DD
|
|
|
|
code->mov(mask, 0x8003800280018000);
|
|
|
|
code->movq(xmm_mask, mask);
|
|
|
|
|
|
|
|
// Expand each 8-bit value to 16-bit
|
2017-02-24 22:25:31 +01:00
|
|
|
code->pshufb(xmm_a, xmm_mask);
|
|
|
|
code->pshufb(xmm_b, xmm_mask);
|
2016-11-25 21:32:22 +01:00
|
|
|
|
|
|
|
// Add the individual 16-bit values
|
2017-02-24 22:25:31 +01:00
|
|
|
code->paddw(xmm_a, xmm_b);
|
2016-11-25 21:32:22 +01:00
|
|
|
|
|
|
|
// Shift the 16-bit values to the right to halve them
|
2017-02-24 22:25:31 +01:00
|
|
|
code->psrlw(xmm_a, 1);
|
2016-11-25 21:32:22 +01:00
|
|
|
|
|
|
|
// Set the mask to pack the values again
|
|
|
|
// 0x00AA00BB00CC00DD becomes 0xAABBCCDD
|
|
|
|
code->mov(mask, 0x06040200);
|
|
|
|
code->movq(xmm_mask, mask);
|
|
|
|
|
|
|
|
// Shuffle them back to 8-bit values
|
2017-02-24 22:25:31 +01:00
|
|
|
code->pshufb(xmm_a, xmm_mask);
|
2016-11-25 21:32:22 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
|
|
|
} else {
|
|
|
|
// Fallback implementation in case the CPU doesn't support SSSE3
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32();
|
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
2016-11-25 21:32:22 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7F7F7F7F);
|
|
|
|
code->add(result, xor_a_b);
|
2016-11-25 21:32:22 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
|
|
|
}
|
2016-11-25 21:32:22 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-11-26 12:28:20 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32();
|
2016-11-26 12:28:20 +01:00
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
|
|
|
|
|
|
|
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
|
|
|
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
|
|
|
|
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7FFF7FFF);
|
|
|
|
code->add(result, xor_a_b);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-11-26 12:28:20 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-11-26 19:12:29 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32();
|
2016-11-26 19:12:29 +01:00
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
|
|
|
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
|
|
|
// carry propagates the sign bit from (x^y)>>1 upwards by one.
|
|
|
|
|
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->mov(carry, xor_a_b);
|
|
|
|
code->and(carry, 0x80808080);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7F7F7F7F);
|
|
|
|
code->add(result, xor_a_b);
|
|
|
|
code->xor(result, carry);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-11-26 19:12:29 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-11-26 19:12:29 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32();
|
2016-11-26 19:12:29 +01:00
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
|
|
|
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
|
|
|
// carry propagates the sign bit from (x^y)>>1 upwards by one.
|
|
|
|
|
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->mov(carry, xor_a_b);
|
|
|
|
code->and(carry, 0x80008000);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7FFF7FFF);
|
|
|
|
code->add(result, xor_a_b);
|
|
|
|
code->xor(result, carry);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-11-26 19:12:29 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-11-26 19:27:21 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32();
|
2016-11-26 19:27:21 +01:00
|
|
|
|
|
|
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
|
|
|
|
|
|
|
code->xor(minuend, subtrahend);
|
|
|
|
code->and(subtrahend, minuend);
|
|
|
|
code->shr(minuend, 1);
|
|
|
|
|
|
|
|
// At this point,
|
|
|
|
// minuend := (a^b) >> 1
|
|
|
|
// subtrahend := (a^b) & b
|
|
|
|
|
|
|
|
// We must now perform a partitioned subtraction.
|
|
|
|
// We can do this because minuend contains 7 bit fields.
|
|
|
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
|
|
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
|
|
|
code->or(minuend, 0x80808080);
|
|
|
|
code->sub(minuend, subtrahend);
|
|
|
|
code->xor(minuend, 0x80808080);
|
|
|
|
|
|
|
|
// minuend now contains the desired result.
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, minuend);
|
2016-11-26 19:27:21 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-22 13:02:24 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32();
|
2016-12-22 13:02:24 +01:00
|
|
|
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
|
|
|
|
|
|
|
code->xor(minuend, subtrahend);
|
|
|
|
code->and(subtrahend, minuend);
|
|
|
|
code->mov(carry, minuend);
|
|
|
|
code->and(carry, 0x80808080);
|
|
|
|
code->shr(minuend, 1);
|
|
|
|
|
|
|
|
// At this point,
|
|
|
|
// minuend := (a^b) >> 1
|
|
|
|
// subtrahend := (a^b) & b
|
|
|
|
// carry := (a^b) & 0x80808080
|
|
|
|
|
|
|
|
// We must now perform a partitioned subtraction.
|
|
|
|
// We can do this because minuend contains 7 bit fields.
|
|
|
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
|
|
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
|
|
|
// We then sign extend the result into this bit.
|
|
|
|
code->or(minuend, 0x80808080);
|
|
|
|
code->sub(minuend, subtrahend);
|
|
|
|
code->xor(minuend, 0x80808080);
|
|
|
|
code->xor(minuend, carry);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, minuend);
|
2016-12-22 13:02:24 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-11-26 19:27:21 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32();
|
2016-11-26 19:27:21 +01:00
|
|
|
|
|
|
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
|
|
|
|
|
|
|
code->xor(minuend, subtrahend);
|
|
|
|
code->and(subtrahend, minuend);
|
|
|
|
code->shr(minuend, 1);
|
|
|
|
|
|
|
|
// At this point,
|
|
|
|
// minuend := (a^b) >> 1
|
|
|
|
// subtrahend := (a^b) & b
|
|
|
|
|
|
|
|
// We must now perform a partitioned subtraction.
|
|
|
|
// We can do this because minuend contains 15 bit fields.
|
|
|
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
|
|
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
|
|
|
code->or(minuend, 0x80008000);
|
|
|
|
code->sub(minuend, subtrahend);
|
|
|
|
code->xor(minuend, 0x80008000);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, minuend);
|
2016-12-22 13:02:24 +01:00
|
|
|
}
|
2016-11-26 19:27:21 +01:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedHalvingSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-22 13:02:24 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32();
|
2016-12-22 13:02:24 +01:00
|
|
|
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
|
|
|
|
|
|
|
code->xor(minuend, subtrahend);
|
|
|
|
code->and(subtrahend, minuend);
|
|
|
|
code->mov(carry, minuend);
|
|
|
|
code->and(carry, 0x80008000);
|
|
|
|
code->shr(minuend, 1);
|
|
|
|
|
|
|
|
// At this point,
|
|
|
|
// minuend := (a^b) >> 1
|
|
|
|
// subtrahend := (a^b) & b
|
|
|
|
// carry := (a^b) & 0x80008000
|
|
|
|
|
|
|
|
// We must now perform a partitioned subtraction.
|
|
|
|
// We can do this because minuend contains 7 bit fields.
|
|
|
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
|
|
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
|
|
|
// We then sign extend the result into this bit.
|
|
|
|
code->or(minuend, 0x80008000);
|
|
|
|
code->sub(minuend, subtrahend);
|
|
|
|
code->xor(minuend, 0x80008000);
|
|
|
|
code->xor(minuend, carry);
|
2016-11-26 19:27:21 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, minuend);
|
|
|
|
}
|
2016-12-28 22:28:55 +01:00
|
|
|
|
2017-03-24 16:56:24 +01:00
|
|
|
void EmitPackedSubAdd(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2017-03-24 16:56:24 +01:00
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
2016-12-28 22:28:55 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_a_hi = reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(args[1]).cvt32();
|
2016-12-28 22:28:55 +01:00
|
|
|
Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32();
|
2017-03-24 16:56:24 +01:00
|
|
|
Xbyak::Reg32 reg_sum, reg_diff;
|
2016-12-28 22:28:55 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
if (is_signed) {
|
|
|
|
code->movsx(reg_a_lo, reg_a_hi.cvt16());
|
|
|
|
code->movsx(reg_b_lo, reg_b_hi.cvt16());
|
|
|
|
code->sar(reg_a_hi, 16);
|
|
|
|
code->sar(reg_b_hi, 16);
|
|
|
|
} else {
|
|
|
|
code->movzx(reg_a_lo, reg_a_hi.cvt16());
|
|
|
|
code->movzx(reg_b_lo, reg_b_hi.cvt16());
|
|
|
|
code->shr(reg_a_hi, 16);
|
|
|
|
code->shr(reg_b_hi, 16);
|
|
|
|
}
|
2016-12-28 22:28:55 +01:00
|
|
|
|
2017-03-24 16:56:24 +01:00
|
|
|
if (hi_is_sum) {
|
2016-12-28 22:28:55 +01:00
|
|
|
code->sub(reg_a_lo, reg_b_hi);
|
|
|
|
code->add(reg_a_hi, reg_b_lo);
|
2017-03-24 16:56:24 +01:00
|
|
|
reg_diff = reg_a_lo;
|
|
|
|
reg_sum = reg_a_hi;
|
2016-12-28 22:28:55 +01:00
|
|
|
} else {
|
|
|
|
code->add(reg_a_lo, reg_b_hi);
|
|
|
|
code->sub(reg_a_hi, reg_b_lo);
|
2017-03-24 16:56:24 +01:00
|
|
|
reg_diff = reg_a_hi;
|
|
|
|
reg_sum = reg_a_lo;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
|
|
|
|
// The reg_b registers are no longer required.
|
|
|
|
Xbyak::Reg32 ge_sum = reg_b_hi;
|
|
|
|
Xbyak::Reg32 ge_diff = reg_b_lo;
|
|
|
|
|
|
|
|
code->mov(ge_sum, reg_sum);
|
|
|
|
code->mov(ge_diff, reg_diff);
|
|
|
|
|
|
|
|
if (!is_signed) {
|
|
|
|
code->shl(ge_sum, 15);
|
|
|
|
code->sar(ge_sum, 16);
|
|
|
|
} else {
|
|
|
|
code->not(ge_sum);
|
|
|
|
}
|
|
|
|
code->not(ge_diff);
|
|
|
|
code->and(ge_sum, hi_is_sum ? 0xC0000000 : 0x30000000);
|
|
|
|
code->and(ge_diff, hi_is_sum ? 0x30000000 : 0xC0000000);
|
|
|
|
code->or_(ge_sum, ge_diff);
|
|
|
|
code->shr(ge_sum, 28);
|
|
|
|
|
|
|
|
reg_alloc.DefineValue(ge_inst, ge_sum);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_halving) {
|
|
|
|
code->shl(reg_a_lo, 15);
|
2016-12-28 22:28:55 +01:00
|
|
|
code->shr(reg_a_hi, 1);
|
2017-03-24 16:56:24 +01:00
|
|
|
} else {
|
|
|
|
code->shl(reg_a_lo, 16);
|
2016-12-28 22:28:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
|
|
|
|
// Merge them.
|
|
|
|
code->shld(reg_a_hi, reg_a_lo, 16);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, reg_a_hi);
|
2016-12-28 22:28:55 +01:00
|
|
|
}
|
|
|
|
|
2017-03-24 16:56:24 +01:00
|
|
|
void EmitX64::EmitPackedAddSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, true, false, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedAddSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, true, true, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSubAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, false, false, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSubAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, false, true, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedHalvingAddSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, true, false, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedHalvingAddSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, true, true, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedHalvingSubAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, false, false, true);
|
2017-02-24 22:25:31 +01:00
|
|
|
}
|
2016-12-04 21:52:33 +01:00
|
|
|
|
2017-03-24 16:56:24 +01:00
|
|
|
void EmitX64::EmitPackedHalvingSubAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
|
|
|
EmitPackedSubAdd(code, reg_alloc, block, inst, false, true, true);
|
2017-02-24 22:25:31 +01:00
|
|
|
}
|
2016-12-04 21:52:33 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-04 21:52:33 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]);
|
2016-12-04 21:52:33 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
(code->*fn)(xmm_a, xmm_b);
|
2016-12-04 21:52:33 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, xmm_a);
|
2016-12-04 21:52:33 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
|
2016-08-12 19:26:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsb);
|
2016-08-12 19:26:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusb);
|
2016-08-12 19:18:38 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsb);
|
2016-08-12 19:18:38 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedSaturatedSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
|
2016-08-12 19:42:16 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitPackedAbsDiffSumS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-12-17 20:52:22 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw);
|
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
2016-08-06 18:21:29 +02:00
|
|
|
// We need to report back whether we've found a denormal on input.
|
|
|
|
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
2016-08-24 21:07:08 +02:00
|
|
|
|
|
|
|
code->movd(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_IDC)], u32(1 << 7));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
2017-03-18 18:20:21 +01:00
|
|
|
auto mask = code->MConst(f64_non_sign_mask);
|
2016-08-24 21:07:08 +02:00
|
|
|
mask.setBit(64);
|
2017-03-18 18:20:21 +01:00
|
|
|
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
|
2016-08-24 21:07:08 +02:00
|
|
|
penult_denormal.setBit(64);
|
|
|
|
|
|
|
|
code->movq(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, mask);
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, penult_denormal);
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_IDC)], u32(1 << 7));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->movd(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_UFC)], u32(1 << 3));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
2017-03-18 18:20:21 +01:00
|
|
|
auto mask = code->MConst(f64_non_sign_mask);
|
2016-08-24 21:07:08 +02:00
|
|
|
mask.setBit(64);
|
2017-03-18 18:20:21 +01:00
|
|
|
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
|
2016-08-24 21:07:08 +02:00
|
|
|
penult_denormal.setBit(64);
|
|
|
|
|
|
|
|
code->movq(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, mask);
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, penult_denormal);
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_UFC)], u32(1 << 3));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->ucomiss(xmm_value, xmm_value);
|
|
|
|
code->jnp(end);
|
2017-03-18 18:20:21 +01:00
|
|
|
code->movaps(xmm_value, code->MConst(f32_nan));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->ucomisd(xmm_value, xmm_value);
|
|
|
|
code->jnp(end);
|
2017-03-18 18:20:21 +01:00
|
|
|
code->movaps(xmm_value, code->MConst(f64_nan));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
2016-08-26 16:23:08 +02:00
|
|
|
static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
|
|
|
code->pxor(xmm_scratch, xmm_scratch);
|
|
|
|
code->cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
|
|
|
code->pand(xmm_value, xmm_scratch);
|
2016-08-24 21:07:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2017-04-01 10:53:29 +02:00
|
|
|
Xbyak::Xmm operand = reg_alloc.UseScratchXmm(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-06 18:21:29 +02:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch);
|
|
|
|
DenormalsAreZero32(code, operand, gpr_scratch);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, operand);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-06 18:21:29 +02:00
|
|
|
FlushToZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN32(code, result);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2017-04-01 10:53:29 +02:00
|
|
|
Xbyak::Xmm operand = reg_alloc.UseScratchXmm(args[1]);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-06 18:21:29 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
|
|
|
DenormalsAreZero64(code, operand, gpr_scratch);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, operand);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN64(code, result);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-06 18:21:29 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-07 13:19:07 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-07 13:19:07 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 13:19:07 +02:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 17:34:05 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 13:19:07 +02:00
|
|
|
FlushToZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN32(code, result);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-08-07 13:19:07 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-07 13:19:07 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
2016-08-25 17:34:05 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
(code->*fn)(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 19:08:48 +02:00
|
|
|
DefaultNaN64(code, result);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitTransferFromFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitTransferFromFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitTransferToFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate() && args[0].GetImmediateU32() == 0) {
|
|
|
|
Xbyak::Xmm result = reg_alloc.ScratchXmm();
|
2016-12-03 12:36:03 +01:00
|
|
|
code->xorps(result, result);
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-12-03 12:36:03 +01:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-12-03 12:36:03 +01:00
|
|
|
}
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitTransferToFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
if (args[0].IsImmediate() && args[0].GetImmediateU64() == 0) {
|
|
|
|
Xbyak::Xmm result = reg_alloc.ScratchXmm();
|
|
|
|
code->xorps(result, result);
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-12-03 12:36:03 +01:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, args[0]);
|
2016-12-03 12:36:03 +01:00
|
|
|
}
|
2016-08-07 20:25:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPAbs32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-07 02:41:25 +02:00
|
|
|
|
2017-03-18 18:20:21 +01:00
|
|
|
code->pand(result, code->MConst(f32_non_sign_mask));
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-07 02:41:25 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPAbs64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-07 02:41:25 +02:00
|
|
|
|
2017-03-18 18:20:21 +01:00
|
|
|
code->pand(result, code->MConst(f64_non_sign_mask));
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPNeg32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-07 11:56:12 +02:00
|
|
|
|
2017-03-18 18:20:21 +01:00
|
|
|
code->pxor(result, code->MConst(f32_negative_zero));
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPNeg64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-07 11:56:12 +02:00
|
|
|
|
2017-03-18 18:20:21 +01:00
|
|
|
code->pxor(result, code->MConst(f64_negative_zero));
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPAdd32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addss);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPAdd64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addsd);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPDiv32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divss);
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPDiv64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divsd);
|
2016-08-07 11:56:12 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPMul32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulss);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPMul64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulsd);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPSqrt32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPTwoOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtss);
|
2016-08-07 13:19:07 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPSqrt64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPTwoOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtsd);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPSub32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subss);
|
2016-08-07 11:21:14 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPSub64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subsd);
|
2016-08-07 02:41:25 +02:00
|
|
|
}
|
|
|
|
|
2016-12-04 12:43:31 +01:00
|
|
|
static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) {
|
|
|
|
reg_alloc.ScratchGpr({HostLoc::RAX}); // lahf requires use of ah
|
|
|
|
Xbyak::Reg32 nzcv_imm = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 nzcv = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->lahf();
|
|
|
|
code->mov(nzcv_imm, 0x30000000);
|
|
|
|
code->cmp(ah, 0b01000111);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(nzcv_imm, 0x20000000);
|
|
|
|
code->cmp(ah, 0b00000010);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(nzcv_imm, 0x80000000);
|
|
|
|
code->cmp(ah, 0b00000011);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(nzcv_imm, 0x60000000);
|
|
|
|
code->cmp(ah, 0b01000010);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], nzcv);
|
2016-11-26 12:17:16 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPCompare32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]);
|
|
|
|
Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]);
|
|
|
|
bool quiet = args[2].GetImmediateU1();
|
2016-11-26 12:17:16 +01:00
|
|
|
|
|
|
|
if (quiet) {
|
|
|
|
code->ucomiss(reg_a, reg_b);
|
|
|
|
} else {
|
|
|
|
code->comiss(reg_a, reg_b);
|
|
|
|
}
|
|
|
|
|
2016-12-04 12:43:31 +01:00
|
|
|
SetFpscrNzcvFromFlags(code, reg_alloc);
|
2016-11-26 12:17:16 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPCompare64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]);
|
|
|
|
Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]);
|
|
|
|
bool quiet = args[2].GetImmediateU1();
|
2016-11-26 12:17:16 +01:00
|
|
|
|
|
|
|
if (quiet) {
|
|
|
|
code->ucomisd(reg_a, reg_b);
|
|
|
|
} else {
|
|
|
|
code->comisd(reg_a, reg_b);
|
|
|
|
}
|
|
|
|
|
2016-12-04 12:43:31 +01:00
|
|
|
SetFpscrNzcvFromFlags(code, reg_alloc);
|
2016-11-26 12:17:16 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPSingleToDouble(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch.cvt32());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DefaultNaN64(code, result);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPDoubleToSingle(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2ss(result, result);
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
FlushToZero32(code, result, gpr_scratch.cvt32());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-23 23:04:46 +02:00
|
|
|
DefaultNaN32(code, result);
|
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, result);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 16:23:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
bool round_towards_zero = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// Conversion to double is lossless, and allows for clamping.
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
DenormalsAreZero32(code, from, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
|
|
|
if (round_towards_zero) {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvttsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
// Clamp to output range
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2017-03-18 18:20:21 +01:00
|
|
|
code->minsd(from, code->MConst(f64_max_s32));
|
|
|
|
code->maxsd(from, code->MConst(f64_min_s32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Second time is for real
|
|
|
|
if (round_towards_zero) {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvttsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 16:23:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
bool round_towards_zero = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// Conversion to double is lossless, and allows for accurate clamping.
|
|
|
|
//
|
|
|
|
// Since SSE2 doesn't provide an unsigned conversion, we shift the range as appropriate.
|
|
|
|
//
|
|
|
|
// FIXME: Inexact exception not correctly signalled with the below code
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) {
|
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
DenormalsAreZero32(code, from, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into SSE range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->addsd(from, code->MConst(f64_min_s32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->minsd(from, code->MConst(f64_max_s32));
|
|
|
|
code->maxsd(from, code->MConst(f64_min_s32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range
|
2017-02-24 22:25:31 +01:00
|
|
|
code->add(to, u32(2147483648u));
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2017-02-24 22:25:31 +01:00
|
|
|
DenormalsAreZero32(code, from, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Generate masks if out-of-signed-range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->movaps(xmm_mask, code->MConst(f64_max_s32));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmpltsd(xmm_mask, from);
|
|
|
|
code->movd(gpr_mask, xmm_mask);
|
2017-03-18 18:20:21 +01:00
|
|
|
code->pand(xmm_mask, code->MConst(f64_min_s32));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(gpr_mask, u32(2147483648u));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into range if necessary
|
2016-08-24 21:07:08 +02:00
|
|
|
code->addsd(from, xmm_mask);
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvttsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->minsd(from, code->MConst(f64_max_s32));
|
|
|
|
code->maxsd(from, code->MConst(f64_min_u32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvttsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range if necessary
|
2017-02-24 22:25:31 +01:00
|
|
|
code->add(to, gpr_mask);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 16:23:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
bool round_towards_zero = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
// First time is to set flags
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
// Clamp to output range
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2017-03-18 18:20:21 +01:00
|
|
|
code->minsd(from, code->MConst(f64_max_s32));
|
|
|
|
code->maxsd(from, code->MConst(f64_min_s32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Second time is for real
|
|
|
|
if (round_towards_zero) {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvttsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]);
|
|
|
|
Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
bool round_towards_zero = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// TODO: Use VCVTPD2UDQ when AVX512VL is available.
|
|
|
|
// FIXME: Inexact exception not correctly signalled with the below code
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) {
|
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into SSE range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->addsd(from, code->MConst(f64_min_s32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->minsd(from, code->MConst(f64_max_s32));
|
|
|
|
code->maxsd(from, code->MConst(f64_min_s32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range
|
2017-02-24 22:25:31 +01:00
|
|
|
code->add(to, u32(2147483648u));
|
2016-08-23 23:04:46 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2016-08-26 16:23:08 +02:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 23:04:46 +02:00
|
|
|
// Generate masks if out-of-signed-range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->movaps(xmm_mask, code->MConst(f64_max_s32));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmpltsd(xmm_mask, from);
|
|
|
|
code->movd(gpr_mask, xmm_mask);
|
2017-03-18 18:20:21 +01:00
|
|
|
code->pand(xmm_mask, code->MConst(f64_min_s32));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(gpr_mask, u32(2147483648u));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring into range if necessary
|
2016-08-24 21:07:08 +02:00
|
|
|
code->addsd(from, xmm_mask);
|
2016-08-23 23:04:46 +02:00
|
|
|
// First time is to set flags
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Clamp to output range
|
2017-03-18 18:20:21 +01:00
|
|
|
code->minsd(from, code->MConst(f64_max_s32));
|
|
|
|
code->maxsd(from, code->MConst(f64_min_u32));
|
2016-08-23 23:04:46 +02:00
|
|
|
// Actually convert
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvttsd2si(to, from); // 32 bit gpr
|
2016-08-23 23:04:46 +02:00
|
|
|
// Bring back into original range if necessary
|
2017-02-24 22:25:31 +01:00
|
|
|
code->add(to, gpr_mask);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPS32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Xmm to = reg_alloc.ScratchXmm();
|
|
|
|
bool round_to_nearest = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsi2ss(to, from);
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPU32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]);
|
|
|
|
Xbyak::Xmm to = reg_alloc.ScratchXmm();
|
|
|
|
bool round_to_nearest = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
|
|
|
code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
|
|
|
code->cvtsi2ss(to, from);
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPS32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32();
|
|
|
|
Xbyak::Xmm to = reg_alloc.ScratchXmm();
|
|
|
|
bool round_to_nearest = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->cvtsi2sd(to, from);
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]);
|
|
|
|
Xbyak::Xmm to = reg_alloc.ScratchXmm();
|
|
|
|
bool round_to_nearest = args[1].GetImmediateU1();
|
2016-08-23 23:04:46 +02:00
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
|
|
|
code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
|
|
|
code->cvtsi2sd(to, from);
|
2016-08-23 23:04:46 +02:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, to);
|
2016-08-23 23:04:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
ASSERT(args[1].IsImmediate());
|
|
|
|
Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32();
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(1));
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
template <typename FunctionPointer>
|
|
|
|
static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.HostCall(inst, args[0]);
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
if (!cb.page_table) {
|
|
|
|
code->CallFunction(fn);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2017-02-26 15:48:47 +01:00
|
|
|
Xbyak::Reg64 result = code->ABI_RETURN;
|
|
|
|
Xbyak::Reg32 vaddr = code->ABI_PARAM1.cvt32();
|
|
|
|
Xbyak::Reg64 page_index = code->ABI_PARAM3;
|
|
|
|
Xbyak::Reg64 page_offset = code->ABI_PARAM4;
|
2016-09-01 01:06:40 +02:00
|
|
|
|
|
|
|
Xbyak::Label abort, end;
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
code->mov(result, reinterpret_cast<u64>(cb.page_table));
|
2016-09-01 01:06:40 +02:00
|
|
|
code->mov(page_index.cvt32(), vaddr);
|
|
|
|
code->shr(page_index.cvt32(), 12);
|
2017-02-24 22:25:31 +01:00
|
|
|
code->mov(result, qword[result + page_index * 8]);
|
|
|
|
code->test(result, result);
|
2016-09-01 01:06:40 +02:00
|
|
|
code->jz(abort);
|
|
|
|
code->mov(page_offset.cvt32(), vaddr);
|
|
|
|
code->and_(page_offset.cvt32(), 4095);
|
|
|
|
switch (bit_size) {
|
|
|
|
case 8:
|
2017-02-24 22:25:31 +01:00
|
|
|
code->movzx(result, code->byte[result + page_offset]);
|
2016-09-01 01:06:40 +02:00
|
|
|
break;
|
|
|
|
case 16:
|
2017-02-24 22:25:31 +01:00
|
|
|
code->movzx(result, word[result + page_offset]);
|
2016-09-01 01:06:40 +02:00
|
|
|
break;
|
|
|
|
case 32:
|
2017-02-24 22:25:31 +01:00
|
|
|
code->mov(result.cvt32(), dword[result + page_offset]);
|
2016-09-01 01:06:40 +02:00
|
|
|
break;
|
|
|
|
case 64:
|
2017-02-24 22:25:31 +01:00
|
|
|
code->mov(result.cvt64(), qword[result + page_offset]);
|
2016-09-01 01:06:40 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid bit_size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
code->jmp(end);
|
|
|
|
code->L(abort);
|
|
|
|
code->call(code->GetMemoryReadCallback(bit_size));
|
|
|
|
code->L(end);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
template<typename FunctionPointer>
|
|
|
|
static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
|
|
|
reg_alloc.HostCall(nullptr, args[0], args[1]);
|
|
|
|
|
2016-09-01 01:06:40 +02:00
|
|
|
if (!cb.page_table) {
|
|
|
|
code->CallFunction(fn);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2017-02-26 15:48:47 +01:00
|
|
|
Xbyak::Reg32 vaddr = code->ABI_PARAM1.cvt32();
|
|
|
|
Xbyak::Reg64 value = code->ABI_PARAM2;
|
|
|
|
Xbyak::Reg64 page_index = code->ABI_PARAM3;
|
|
|
|
Xbyak::Reg64 page_offset = code->ABI_PARAM4;
|
2016-09-01 01:06:40 +02:00
|
|
|
|
|
|
|
Xbyak::Label abort, end;
|
|
|
|
|
2016-12-05 17:29:36 +01:00
|
|
|
code->mov(rax, reinterpret_cast<u64>(cb.page_table));
|
2016-09-01 01:06:40 +02:00
|
|
|
code->mov(page_index.cvt32(), vaddr);
|
|
|
|
code->shr(page_index.cvt32(), 12);
|
|
|
|
code->mov(rax, qword[rax + page_index * 8]);
|
|
|
|
code->test(rax, rax);
|
|
|
|
code->jz(abort);
|
|
|
|
code->mov(page_offset.cvt32(), vaddr);
|
|
|
|
code->and_(page_offset.cvt32(), 4095);
|
|
|
|
switch (bit_size) {
|
|
|
|
case 8:
|
|
|
|
code->mov(code->byte[rax + page_offset], value.cvt8());
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
code->mov(word[rax + page_offset], value.cvt16());
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
code->mov(dword[rax + page_offset], value.cvt32());
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
code->mov(qword[rax + page_offset], value.cvt64());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid bit_size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
code->jmp(end);
|
|
|
|
code->L(abort);
|
|
|
|
code->call(code->GetMemoryWriteCallback(bit_size));
|
|
|
|
code->L(end);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitReadMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 8, cb.memory.Read8);
|
2016-09-01 01:06:40 +02:00
|
|
|
}
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitReadMemory16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 16, cb.memory.Read16);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitReadMemory32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 32, cb.memory.Read32);
|
2016-09-01 01:06:40 +02:00
|
|
|
}
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitReadMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 64, cb.memory.Read64);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitWriteMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 8, cb.memory.Write8);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitWriteMemory16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 16, cb.memory.Write16);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitWriteMemory32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 32, cb.memory.Write32);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitWriteMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-01-30 22:42:17 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 64, cb.memory.Write64);
|
2016-07-11 23:43:53 +02:00
|
|
|
}
|
|
|
|
|
2016-08-31 22:09:26 +02:00
|
|
|
template <typename FunctionPointer>
|
2017-02-26 15:51:12 +01:00
|
|
|
static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, FunctionPointer fn, bool prepend_high_word) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2017-02-26 15:51:12 +01:00
|
|
|
if (prepend_high_word) {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.HostCall(nullptr, args[0], args[1], args[2]);
|
2017-02-26 15:51:12 +01:00
|
|
|
} else {
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.HostCall(nullptr, args[0], args[1]);
|
2017-02-26 15:51:12 +01:00
|
|
|
}
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32();
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers.
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(passed, u32(1));
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->je(end);
|
|
|
|
code->mov(tmp, code->ABI_PARAM1);
|
|
|
|
code->xor_(tmp, dword[r15 + offsetof(JitState, exclusive_address)]);
|
|
|
|
code->test(tmp, JitState::RESERVATION_GRANULE_MASK);
|
|
|
|
code->jne(end);
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
2017-02-26 15:51:12 +01:00
|
|
|
if (prepend_high_word) {
|
|
|
|
code->mov(code->ABI_PARAM2.cvt32(), code->ABI_PARAM2.cvt32()); // zero extend to 64-bits
|
|
|
|
code->shl(code->ABI_PARAM3, 32);
|
|
|
|
code->or_(code->ABI_PARAM2, code->ABI_PARAM3);
|
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
code->CallFunction(fn);
|
|
|
|
code->xor_(passed, passed);
|
|
|
|
code->L(end);
|
2017-02-24 22:25:31 +01:00
|
|
|
|
|
|
|
reg_alloc.DefineValue(inst, passed);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitExclusiveWriteMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-26 15:51:12 +01:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write8, false);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitExclusiveWriteMemory16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-26 15:51:12 +01:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write16, false);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitExclusiveWriteMemory32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-26 15:51:12 +01:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write32, false);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitExclusiveWriteMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-26 15:51:12 +01:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write64, true);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 23:48:20 +02:00
|
|
|
}
|
2016-07-11 23:43:53 +02:00
|
|
|
|
2016-12-31 12:17:47 +01:00
|
|
|
static void EmitCoprocessorException() {
|
|
|
|
ASSERT_MSG(false, "Should raise coproc exception here");
|
|
|
|
}
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
static void CallCoprocCallback(BlockOfCode* code, RegAlloc& reg_alloc, Jit* jit_interface, Coprocessor::Callback callback, IR::Inst* inst = nullptr, boost::optional<Argument&> arg0 = {}, boost::optional<Argument&> arg1 = {}) {
|
2016-12-31 12:17:47 +01:00
|
|
|
reg_alloc.HostCall(inst, {}, {}, arg0, arg1);
|
|
|
|
|
|
|
|
code->mov(code->ABI_PARAM1, reinterpret_cast<u64>(jit_interface));
|
|
|
|
if (callback.user_arg) {
|
|
|
|
code->mov(code->ABI_PARAM2, reinterpret_cast<u64>(*callback.user_arg));
|
|
|
|
}
|
|
|
|
|
|
|
|
code->CallFunction(callback.function);
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCoprocInternalOperation(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-12-31 12:17:47 +01:00
|
|
|
auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
|
|
|
|
|
|
|
size_t coproc_num = coproc_info[0];
|
|
|
|
bool two = coproc_info[1] != 0;
|
|
|
|
unsigned opc1 = static_cast<unsigned>(coproc_info[2]);
|
|
|
|
Arm::CoprocReg CRd = static_cast<Arm::CoprocReg>(coproc_info[3]);
|
|
|
|
Arm::CoprocReg CRn = static_cast<Arm::CoprocReg>(coproc_info[4]);
|
|
|
|
Arm::CoprocReg CRm = static_cast<Arm::CoprocReg>(coproc_info[5]);
|
|
|
|
unsigned opc2 = static_cast<unsigned>(coproc_info[6]);
|
|
|
|
|
|
|
|
std::shared_ptr<Coprocessor> coproc = cb.coprocessors[coproc_num];
|
|
|
|
if (!coproc) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto action = coproc->CompileInternalOperation(two, opc1, CRd, CRn, CRm, opc2);
|
|
|
|
if (!action) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
CallCoprocCallback(code, reg_alloc, jit_interface, *action);
|
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-31 12:17:47 +01:00
|
|
|
auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
|
|
|
|
|
|
|
size_t coproc_num = coproc_info[0];
|
|
|
|
bool two = coproc_info[1] != 0;
|
|
|
|
unsigned opc1 = static_cast<unsigned>(coproc_info[2]);
|
|
|
|
Arm::CoprocReg CRn = static_cast<Arm::CoprocReg>(coproc_info[3]);
|
|
|
|
Arm::CoprocReg CRm = static_cast<Arm::CoprocReg>(coproc_info[4]);
|
|
|
|
unsigned opc2 = static_cast<unsigned>(coproc_info[5]);
|
|
|
|
|
|
|
|
std::shared_ptr<Coprocessor> coproc = cb.coprocessors[coproc_num];
|
|
|
|
if (!coproc) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto action = coproc->CompileSendOneWord(two, opc1, CRn, CRm, opc2);
|
2016-12-31 12:27:11 +01:00
|
|
|
switch (action.which()) {
|
|
|
|
case 0:
|
2016-12-31 12:17:47 +01:00
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
2016-12-31 12:27:11 +01:00
|
|
|
case 1:
|
2017-02-24 22:25:31 +01:00
|
|
|
CallCoprocCallback(code, reg_alloc, jit_interface, boost::get<Coprocessor::Callback>(action), nullptr, args[1]);
|
2016-12-31 12:27:11 +01:00
|
|
|
return;
|
|
|
|
case 2: {
|
|
|
|
u32* destination_ptr = boost::get<u32*>(action);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_word = reg_alloc.UseGpr(args[1]).cvt32();
|
2016-12-31 12:27:11 +01:00
|
|
|
Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
code->mov(reg_destination_addr, reinterpret_cast<u64>(destination_ptr));
|
|
|
|
code->mov(code->dword[reg_destination_addr], reg_word);
|
2016-12-31 12:17:47 +01:00
|
|
|
|
2016-12-31 12:27:11 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Unreachable");
|
|
|
|
}
|
2016-12-31 12:17:47 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-31 12:17:47 +01:00
|
|
|
auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
|
|
|
|
|
|
|
size_t coproc_num = coproc_info[0];
|
|
|
|
bool two = coproc_info[1] != 0;
|
|
|
|
unsigned opc = static_cast<unsigned>(coproc_info[2]);
|
|
|
|
Arm::CoprocReg CRm = static_cast<Arm::CoprocReg>(coproc_info[3]);
|
|
|
|
|
|
|
|
std::shared_ptr<Coprocessor> coproc = cb.coprocessors[coproc_num];
|
|
|
|
if (!coproc) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto action = coproc->CompileSendTwoWords(two, opc, CRm);
|
2016-12-31 12:27:11 +01:00
|
|
|
switch (action.which()) {
|
|
|
|
case 0:
|
2016-12-31 12:17:47 +01:00
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
2016-12-31 12:27:11 +01:00
|
|
|
case 1:
|
2017-02-24 22:25:31 +01:00
|
|
|
CallCoprocCallback(code, reg_alloc, jit_interface, boost::get<Coprocessor::Callback>(action), nullptr, args[1], args[2]);
|
2016-12-31 12:27:11 +01:00
|
|
|
return;
|
|
|
|
case 2: {
|
|
|
|
auto destination_ptrs = boost::get<std::array<u32*, 2>>(action);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_word1 = reg_alloc.UseGpr(args[1]).cvt32();
|
|
|
|
Xbyak::Reg32 reg_word2 = reg_alloc.UseGpr(args[2]).cvt32();
|
2016-12-31 12:27:11 +01:00
|
|
|
Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr();
|
2016-12-31 12:17:47 +01:00
|
|
|
|
2016-12-31 12:27:11 +01:00
|
|
|
code->mov(reg_destination_addr, reinterpret_cast<u64>(destination_ptrs[0]));
|
|
|
|
code->mov(code->dword[reg_destination_addr], reg_word1);
|
|
|
|
code->mov(reg_destination_addr, reinterpret_cast<u64>(destination_ptrs[1]));
|
|
|
|
code->mov(code->dword[reg_destination_addr], reg_word2);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Unreachable");
|
|
|
|
}
|
|
|
|
}
|
2016-12-31 12:17:47 +01:00
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCoprocGetOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-12-31 12:17:47 +01:00
|
|
|
auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
|
|
|
|
|
|
|
size_t coproc_num = coproc_info[0];
|
|
|
|
bool two = coproc_info[1] != 0;
|
|
|
|
unsigned opc1 = static_cast<unsigned>(coproc_info[2]);
|
|
|
|
Arm::CoprocReg CRn = static_cast<Arm::CoprocReg>(coproc_info[3]);
|
|
|
|
Arm::CoprocReg CRm = static_cast<Arm::CoprocReg>(coproc_info[4]);
|
|
|
|
unsigned opc2 = static_cast<unsigned>(coproc_info[5]);
|
|
|
|
|
|
|
|
std::shared_ptr<Coprocessor> coproc = cb.coprocessors[coproc_num];
|
|
|
|
if (!coproc) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto action = coproc->CompileGetOneWord(two, opc1, CRn, CRm, opc2);
|
2016-12-31 12:27:11 +01:00
|
|
|
switch (action.which()) {
|
|
|
|
case 0:
|
2016-12-31 12:17:47 +01:00
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
2016-12-31 12:27:11 +01:00
|
|
|
case 1:
|
|
|
|
CallCoprocCallback(code, reg_alloc, jit_interface, boost::get<Coprocessor::Callback>(action), inst);
|
|
|
|
return;
|
|
|
|
case 2: {
|
|
|
|
u32* source_ptr = boost::get<u32*>(action);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg32 reg_word = reg_alloc.ScratchGpr().cvt32();
|
2016-12-31 12:27:11 +01:00
|
|
|
Xbyak::Reg64 reg_source_addr = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
code->mov(reg_source_addr, reinterpret_cast<u64>(source_ptr));
|
|
|
|
code->mov(reg_word, code->dword[reg_source_addr]);
|
2016-12-31 12:17:47 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, reg_word);
|
|
|
|
|
2016-12-31 12:27:11 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Unreachable");
|
|
|
|
}
|
2016-12-31 12:17:47 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2016-12-31 12:17:47 +01:00
|
|
|
auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
|
|
|
|
|
|
|
size_t coproc_num = coproc_info[0];
|
|
|
|
bool two = coproc_info[1] != 0;
|
|
|
|
unsigned opc = coproc_info[2];
|
|
|
|
Arm::CoprocReg CRm = static_cast<Arm::CoprocReg>(coproc_info[3]);
|
|
|
|
|
|
|
|
std::shared_ptr<Coprocessor> coproc = cb.coprocessors[coproc_num];
|
|
|
|
if (!coproc) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto action = coproc->CompileGetTwoWords(two, opc, CRm);
|
2016-12-31 12:27:11 +01:00
|
|
|
switch (action.which()) {
|
|
|
|
case 0:
|
2016-12-31 12:17:47 +01:00
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
2016-12-31 12:27:11 +01:00
|
|
|
case 1:
|
|
|
|
CallCoprocCallback(code, reg_alloc, jit_interface, boost::get<Coprocessor::Callback>(action), inst);
|
|
|
|
return;
|
|
|
|
case 2: {
|
|
|
|
auto source_ptrs = boost::get<std::array<u32*, 2>>(action);
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
Xbyak::Reg64 reg_result = reg_alloc.ScratchGpr();
|
2016-12-31 12:27:11 +01:00
|
|
|
Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 reg_tmp = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
code->mov(reg_destination_addr, reinterpret_cast<u64>(source_ptrs[1]));
|
|
|
|
code->mov(reg_result.cvt32(), code->dword[reg_destination_addr]);
|
|
|
|
code->shl(reg_result, 32);
|
|
|
|
code->mov(reg_destination_addr, reinterpret_cast<u64>(source_ptrs[0]));
|
|
|
|
code->mov(reg_tmp.cvt32(), code->dword[reg_destination_addr]);
|
|
|
|
code->or_(reg_result, reg_tmp);
|
2016-12-31 12:17:47 +01:00
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
reg_alloc.DefineValue(inst, reg_result);
|
|
|
|
|
2016-12-31 12:27:11 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Unreachable");
|
|
|
|
}
|
2016-12-31 12:17:47 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-31 12:17:47 +01:00
|
|
|
auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
|
|
|
|
|
|
|
size_t coproc_num = coproc_info[0];
|
|
|
|
bool two = coproc_info[1] != 0;
|
|
|
|
bool long_transfer = coproc_info[2] != 0;
|
|
|
|
Arm::CoprocReg CRd = static_cast<Arm::CoprocReg>(coproc_info[3]);
|
|
|
|
bool has_option = coproc_info[4] != 0;
|
|
|
|
boost::optional<u8> option{has_option, coproc_info[5]};
|
|
|
|
|
|
|
|
std::shared_ptr<Coprocessor> coproc = cb.coprocessors[coproc_num];
|
|
|
|
if (!coproc) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto action = coproc->CompileLoadWords(two, long_transfer, CRd, option);
|
|
|
|
if (!action) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]);
|
2016-12-31 12:17:47 +01:00
|
|
|
}
|
|
|
|
|
2017-02-04 10:23:19 +01:00
|
|
|
void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
2017-02-24 22:25:31 +01:00
|
|
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
2016-12-31 12:17:47 +01:00
|
|
|
auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
|
|
|
|
|
|
|
size_t coproc_num = coproc_info[0];
|
|
|
|
bool two = coproc_info[1] != 0;
|
|
|
|
bool long_transfer = coproc_info[2] != 0;
|
|
|
|
Arm::CoprocReg CRd = static_cast<Arm::CoprocReg>(coproc_info[3]);
|
|
|
|
bool has_option = coproc_info[4] != 0;
|
|
|
|
boost::optional<u8> option{has_option, coproc_info[5]};
|
|
|
|
|
|
|
|
std::shared_ptr<Coprocessor> coproc = cb.coprocessors[coproc_num];
|
|
|
|
if (!coproc) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto action = coproc->CompileStoreWords(two, long_transfer, CRd, option);
|
|
|
|
if (!action) {
|
|
|
|
EmitCoprocessorException();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-02-24 22:25:31 +01:00
|
|
|
CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]);
|
2016-12-31 12:17:47 +01:00
|
|
|
}
|
|
|
|
|
2016-07-04 15:37:50 +02:00
|
|
|
void EmitX64::EmitAddCycles(size_t cycles) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
2016-07-04 15:37:50 +02:00
|
|
|
ASSERT(cycles < std::numeric_limits<u32>::max());
|
2016-08-24 21:07:08 +02:00
|
|
|
code->sub(qword[r15 + offsetof(JitState, cycles_remaining)], static_cast<u32>(cycles));
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) {
|
|
|
|
using namespace Xbyak::util;
|
2016-07-14 13:52:53 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label label;
|
2016-07-14 13:52:53 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg32 cpsr = eax;
|
|
|
|
code->mov(cpsr, MJitStateCpsr());
|
2016-07-14 13:52:53 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
constexpr size_t n_shift = 31;
|
|
|
|
constexpr size_t z_shift = 30;
|
|
|
|
constexpr size_t c_shift = 29;
|
|
|
|
constexpr size_t v_shift = 28;
|
|
|
|
constexpr u32 n_mask = 1u << n_shift;
|
|
|
|
constexpr u32 z_mask = 1u << z_shift;
|
|
|
|
constexpr u32 c_mask = 1u << c_shift;
|
|
|
|
constexpr u32 v_mask = 1u << v_shift;
|
2016-07-14 13:52:53 +02:00
|
|
|
|
|
|
|
switch (cond) {
|
2016-08-23 00:40:30 +02:00
|
|
|
case Arm::Cond::EQ: //z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, z_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::NE: //!z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, z_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::CS: //c
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, c_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::CC: //!c
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, c_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::MI: //n
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, n_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::PL: //!n
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, n_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::VS: //v
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, v_mask);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::VC: //!v
|
2016-08-24 21:07:08 +02:00
|
|
|
code->test(cpsr, v_mask);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
case Arm::Cond::HI: { //c & !z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(cpsr, z_mask | c_mask);
|
|
|
|
code->cmp(cpsr, c_mask);
|
|
|
|
code->je(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LS: { //!c | z
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(cpsr, z_mask | c_mask);
|
|
|
|
code->cmp(cpsr, c_mask);
|
|
|
|
code->jne(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::GE: { // n == v
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(cpsr, n_mask | v_mask);
|
|
|
|
code->jz(label);
|
|
|
|
code->cmp(cpsr, n_mask | v_mask);
|
|
|
|
code->je(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LT: { // n != v
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label fail;
|
|
|
|
code->and_(cpsr, n_mask | v_mask);
|
|
|
|
code->jz(fail);
|
|
|
|
code->cmp(cpsr, n_mask | v_mask);
|
|
|
|
code->jne(label);
|
|
|
|
code->L(fail);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::GT: { // !z & (n == v)
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg32 tmp1 = ebx;
|
|
|
|
const Xbyak::Reg32 tmp2 = esi;
|
|
|
|
code->mov(tmp1, cpsr);
|
|
|
|
code->mov(tmp2, cpsr);
|
|
|
|
code->shr(tmp1, n_shift);
|
|
|
|
code->shr(tmp2, v_shift);
|
|
|
|
code->shr(cpsr, z_shift);
|
|
|
|
code->xor_(tmp1, tmp2);
|
|
|
|
code->or_(tmp1, cpsr);
|
|
|
|
code->test(tmp1, 1);
|
|
|
|
code->jz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LE: { // z | (n != v)
|
2016-08-24 21:07:08 +02:00
|
|
|
const Xbyak::Reg32 tmp1 = ebx;
|
|
|
|
const Xbyak::Reg32 tmp2 = esi;
|
|
|
|
code->mov(tmp1, cpsr);
|
|
|
|
code->mov(tmp2, cpsr);
|
|
|
|
code->shr(tmp1, n_shift);
|
|
|
|
code->shr(tmp2, v_shift);
|
|
|
|
code->shr(cpsr, z_shift);
|
|
|
|
code->xor_(tmp1, tmp2);
|
|
|
|
code->or_(tmp1, cpsr);
|
|
|
|
code->test(tmp1, 1);
|
|
|
|
code->jnz(label);
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
2016-08-26 17:43:51 +02:00
|
|
|
ASSERT_MSG(false, "Unknown cond %zu", static_cast<size_t>(cond));
|
2016-08-23 00:40:30 +02:00
|
|
|
break;
|
2016-07-14 13:52:53 +02:00
|
|
|
}
|
2016-07-18 22:04:39 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
return label;
|
2016-07-18 22:04:39 +02:00
|
|
|
}
|
|
|
|
|
2016-08-18 19:16:18 +02:00
|
|
|
void EmitX64::EmitCondPrelude(const IR::Block& block) {
|
2016-08-25 16:35:50 +02:00
|
|
|
if (block.GetCondition() == Arm::Cond::AL) {
|
|
|
|
ASSERT(!block.HasConditionFailedLocation());
|
2016-07-18 22:04:39 +02:00
|
|
|
return;
|
2016-07-14 13:52:53 +02:00
|
|
|
}
|
|
|
|
|
2016-08-25 16:35:50 +02:00
|
|
|
ASSERT(block.HasConditionFailedLocation());
|
2016-07-18 22:04:39 +02:00
|
|
|
|
2016-08-25 17:34:05 +02:00
|
|
|
Xbyak::Label pass = EmitCond(code, block.GetCondition());
|
2016-08-25 16:35:50 +02:00
|
|
|
EmitAddCycles(block.ConditionFailedCycleCount());
|
2017-02-16 20:40:51 +01:00
|
|
|
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(pass);
|
2016-07-14 13:52:53 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 12:54:09 +02:00
|
|
|
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
2017-02-16 20:40:51 +01:00
|
|
|
Common::VisitVariant<void>(terminal, [this, &initial_location](auto x) {
|
|
|
|
this->EmitTerminal(x, initial_location);
|
|
|
|
});
|
2016-07-04 15:37:50 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 20:40:51 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) {
|
2016-08-01 21:03:13 +02:00
|
|
|
ASSERT_MSG(terminal.next.TFlag() == initial_location.TFlag(), "Unimplemented");
|
|
|
|
ASSERT_MSG(terminal.next.EFlag() == initial_location.EFlag(), "Unimplemented");
|
2016-07-04 11:22:11 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(code->ABI_PARAM1.cvt32(), terminal.next.PC());
|
|
|
|
code->mov(code->ABI_PARAM2, reinterpret_cast<u64>(jit_interface));
|
2016-09-01 02:58:19 +02:00
|
|
|
code->mov(code->ABI_PARAM3, reinterpret_cast<u64>(cb.user_arg));
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), code->ABI_PARAM1.cvt32());
|
2016-08-07 23:47:17 +02:00
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 22:09:26 +02:00
|
|
|
code->CallFunction(cb.InterpreterFallback);
|
2016-08-07 23:47:17 +02:00
|
|
|
code->ReturnFromRunCode(false); // TODO: Check cycles
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 20:40:51 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
|
2016-08-07 19:08:48 +02:00
|
|
|
code->ReturnFromRunCode();
|
2016-07-01 15:01:06 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 20:40:51 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-01 21:03:13 +02:00
|
|
|
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
|
|
|
if (terminal.next.TFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 5));
|
2016-07-18 23:18:58 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
|
2016-07-18 23:18:58 +02:00
|
|
|
}
|
|
|
|
}
|
2016-08-01 21:03:13 +02:00
|
|
|
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
|
|
|
if (terminal.next.EFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 9));
|
2016-07-20 16:34:17 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
|
2016-07-20 16:34:17 +02:00
|
|
|
}
|
|
|
|
}
|
2016-08-07 23:47:43 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0);
|
2016-08-07 23:47:43 +02:00
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
patch_information[terminal.next.UniqueHash()].jg.emplace_back(code->getCurr());
|
2016-08-15 15:33:17 +02:00
|
|
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
2017-04-08 11:04:53 +02:00
|
|
|
EmitPatchJg(terminal.next, next_bb->entrypoint);
|
2016-12-19 16:01:49 +01:00
|
|
|
} else {
|
2017-04-08 11:04:53 +02:00
|
|
|
EmitPatchJg(terminal.next);
|
2016-08-07 23:47:43 +02:00
|
|
|
}
|
2016-08-24 21:07:08 +02:00
|
|
|
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), terminal.next.PC());
|
2017-04-07 11:52:44 +02:00
|
|
|
code->ForceReturnFromRunCode(); // TODO: Check cycles, Properly do a link
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 20:40:51 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-15 15:33:17 +02:00
|
|
|
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
|
|
|
if (terminal.next.TFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 5));
|
2016-08-15 15:33:17 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
|
|
|
if (terminal.next.EFlag()) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 9));
|
2016-08-15 15:33:17 +02:00
|
|
|
} else {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr());
|
2016-08-15 15:33:17 +02:00
|
|
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
2017-02-16 19:18:29 +01:00
|
|
|
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
2016-08-15 15:33:17 +02:00
|
|
|
} else {
|
2016-12-19 16:01:49 +01:00
|
|
|
EmitPatchJmp(terminal.next);
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 20:40:51 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-13 01:10:23 +02:00
|
|
|
// This calculation has to match up with IREmitter::PushRSB
|
2016-08-24 21:07:08 +02:00
|
|
|
code->mov(ebx, MJitStateCpsr());
|
|
|
|
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
|
|
|
code->and_(ebx, u32((1 << 5) | (1 << 9)));
|
|
|
|
code->shr(ebx, 2);
|
2016-09-05 15:39:17 +02:00
|
|
|
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->shl(rbx, 32);
|
|
|
|
code->or_(rbx, rcx);
|
|
|
|
|
2016-12-05 17:29:36 +01:00
|
|
|
code->mov(rax, reinterpret_cast<u64>(code->GetReturnFromRunCodeAddress()));
|
2016-08-13 01:10:23 +02:00
|
|
|
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
|
|
|
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
2016-08-13 01:10:23 +02:00
|
|
|
}
|
2016-08-15 15:08:06 +02:00
|
|
|
|
2016-08-24 21:07:08 +02:00
|
|
|
code->jmp(rax);
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 20:40:51 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Term::If terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
Xbyak::Label pass = EmitCond(code, terminal.if_);
|
2016-07-18 22:04:39 +02:00
|
|
|
EmitTerminal(terminal.else_, initial_location);
|
2016-08-24 21:07:08 +02:00
|
|
|
code->L(pass);
|
2016-07-18 22:04:39 +02:00
|
|
|
EmitTerminal(terminal.then_, initial_location);
|
2016-07-07 11:53:09 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 20:40:51 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 21:07:08 +02:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, halt_requested)], u8(0));
|
2017-04-07 11:52:44 +02:00
|
|
|
code->jne(code->GetForceReturnFromRunCodeAddress());
|
2016-08-15 16:02:08 +02:00
|
|
|
EmitTerminal(terminal.else_, initial_location);
|
|
|
|
}
|
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
|
2016-08-24 21:07:08 +02:00
|
|
|
const CodePtr save_code_ptr = code->getCurr();
|
2016-12-19 16:01:49 +01:00
|
|
|
const PatchInformation& patch_info = patch_information[desc.UniqueHash()];
|
2016-08-07 23:11:39 +02:00
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
for (CodePtr location : patch_info.jg) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->SetCodePtr(location);
|
2017-04-08 11:04:53 +02:00
|
|
|
EmitPatchJg(desc, bb);
|
2016-08-07 23:11:39 +02:00
|
|
|
}
|
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
for (CodePtr location : patch_info.jmp) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->SetCodePtr(location);
|
2016-12-19 16:01:49 +01:00
|
|
|
EmitPatchJmp(desc, bb);
|
2016-08-15 15:33:17 +02:00
|
|
|
}
|
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
for (CodePtr location : patch_info.mov_rcx) {
|
2016-08-24 21:07:08 +02:00
|
|
|
code->SetCodePtr(location);
|
2016-12-19 16:01:49 +01:00
|
|
|
EmitPatchMovRcx(bb);
|
2016-08-13 01:10:23 +02:00
|
|
|
}
|
|
|
|
|
2016-08-07 23:11:39 +02:00
|
|
|
code->SetCodePtr(save_code_ptr);
|
|
|
|
}
|
|
|
|
|
2016-12-19 16:01:49 +01:00
|
|
|
void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {
|
|
|
|
Patch(desc, nullptr);
|
|
|
|
}
|
|
|
|
|
2017-04-08 11:04:53 +02:00
|
|
|
void EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
2016-12-19 16:01:49 +01:00
|
|
|
const CodePtr patch_location = code->getCurr();
|
|
|
|
if (target_code_ptr) {
|
|
|
|
code->jg(target_code_ptr);
|
2017-04-08 11:04:53 +02:00
|
|
|
} else {
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), target_desc.PC());
|
|
|
|
code->jg(code->GetReturnFromRunCodeAddress());
|
2016-12-19 16:01:49 +01:00
|
|
|
}
|
2017-04-08 11:04:53 +02:00
|
|
|
code->EnsurePatchLocationSize(patch_location, 14);
|
2016-12-19 16:01:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
|
|
|
const CodePtr patch_location = code->getCurr();
|
|
|
|
if (target_code_ptr) {
|
|
|
|
code->jmp(target_code_ptr);
|
|
|
|
} else {
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), target_desc.PC());
|
|
|
|
code->jmp(code->GetReturnFromRunCodeAddress());
|
|
|
|
}
|
|
|
|
code->EnsurePatchLocationSize(patch_location, 13);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
|
|
|
|
if (!target_code_ptr) {
|
|
|
|
target_code_ptr = code->GetReturnFromRunCodeAddress();
|
|
|
|
}
|
|
|
|
const CodePtr patch_location = code->getCurr();
|
|
|
|
code->mov(code->rcx, reinterpret_cast<u64>(target_code_ptr));
|
|
|
|
code->EnsurePatchLocationSize(patch_location, 10);
|
|
|
|
}
|
|
|
|
|
2016-07-07 13:01:47 +02:00
|
|
|
void EmitX64::ClearCache() {
|
2016-12-19 16:01:49 +01:00
|
|
|
block_descriptors.clear();
|
|
|
|
patch_information.clear();
|
2016-07-07 13:01:47 +02:00
|
|
|
}
|
|
|
|
|
2017-02-16 19:18:29 +01:00
|
|
|
void EmitX64::InvalidateCacheRange(const Common::AddressRange& range) {
|
|
|
|
// Remove cached block descriptors and patch information overlapping with the given range.
|
|
|
|
|
|
|
|
switch (range.which()) {
|
|
|
|
case 0: // FullAddressRange
|
|
|
|
ClearCache();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 1: // AddressInterval
|
|
|
|
auto interval = boost::get<Common::AddressInterval>(range);
|
|
|
|
for (auto it = std::begin(block_descriptors); it != std::end(block_descriptors);) {
|
|
|
|
const IR::LocationDescriptor& descriptor = it->second.start_location;
|
|
|
|
u32 start = descriptor.PC();
|
|
|
|
u32 end = it->second.end_location_pc;
|
|
|
|
if (interval.Overlaps(start, end)) {
|
|
|
|
it = block_descriptors.erase(it);
|
|
|
|
|
|
|
|
auto patch_it = patch_information.find(descriptor.UniqueHash());
|
|
|
|
if (patch_it != patch_information.end()) {
|
|
|
|
Unpatch(descriptor);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
++it;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-01 15:01:06 +02:00
|
|
|
} // namespace BackendX64
|
|
|
|
} // namespace Dynarmic
|