A64: Implement CRC32C

This commit is contained in:
Lioncash 2018-01-25 12:51:45 -05:00 committed by MerryMage
parent d7044bc751
commit 7ffbebf290
7 changed files with 204 additions and 1 deletions

View file

@ -63,6 +63,7 @@ add_library(dynarmic
frontend/A64/translate/impl/data_processing_addsub.cpp
frontend/A64/translate/impl/data_processing_bitfield.cpp
frontend/A64/translate/impl/data_processing_conditional_select.cpp
frontend/A64/translate/impl/data_processing_crc32.cpp
frontend/A64/translate/impl/data_processing_logical.cpp
frontend/A64/translate/impl/data_processing_multiply.cpp
frontend/A64/translate/impl/data_processing_pcrel.cpp
@ -133,6 +134,7 @@ if (ARCHITECTURE_x86_64)
backend_x64/constant_pool.h
backend_x64/emit_x64.cpp
backend_x64/emit_x64.h
backend_x64/emit_x64_crc32.cpp
backend_x64/emit_x64_data_processing.cpp
backend_x64/emit_x64_floating_point.cpp
backend_x64/emit_x64_packed.cpp

View file

@ -0,0 +1,130 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public icense version 2 or any later version.
*/
#include <array>
#include <climits>
#include "backend_x64/block_of_code.h"
#include "backend_x64/emit_x64.h"
#include "common/common_types.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendX64 {
using namespace Xbyak::util;
// CRC32 algorithm that uses polynomial 0x1EDC6F41
constexpr std::array<u32, 256> castagnoli_table{{
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
}};
static u32 ComputeCRC32(const std::array<u32, 256>& table, u32 crc, const u64 value, int length) {
const auto* data = reinterpret_cast<const unsigned char*>(&value);
while (length-- != 0) {
crc = (crc >> 8) ^ table[(crc ^ (*data++)) & 0xFF];
}
return crc;
}
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
code.crc32(crc, value);
ctx.reg_alloc.DefineValue(inst, crc);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[0], args[1], {});
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&castagnoli_table));
code.mov(code.ABI_PARAM4, data_size / CHAR_BIT);
code.CallFunction(&ComputeCRC32);
}
}
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 8);
}
void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 16);
}
void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 32);
}
void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 64);
}
} // namespace Dynarmic::BackendX64

View file

@ -275,7 +275,7 @@ INST(LSRV, "LSRV", "z0011
INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd")
INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd")
//INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd")
//INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd")
INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd")
//INST(PACGA, "PACGA", "10011010110mmmmm001100nnnnnddddd")
// Data Processing - Register - 1 source

View file

@ -0,0 +1,44 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "frontend/A64/translate/impl/impl.h"
namespace Dynarmic::A64 {
bool TranslatorVisitor::CRC32C(bool sf, Reg Rm, Imm<2> sz, Reg Rn, Reg Rd) {
const u32 integral_size = sz.ZeroExtend();
if (sf && integral_size != 0b11) {
return UnallocatedEncoding();
}
if (!sf && integral_size == 0b11) {
return UnallocatedEncoding();
}
const IR::U32 result = [&] {
const size_t datasize = sf ? 64 : 32;
const IR::U32 accumulator = ir.GetW(Rn);
const IR::U32U64 data = X(datasize, Rm);
switch (integral_size) {
case 0b00:
return ir.CRC32Castagnoli8(accumulator, data);
case 0b01:
return ir.CRC32Castagnoli16(accumulator, data);
case 0b10:
return ir.CRC32Castagnoli32(accumulator, data);
case 0b11:
default:
return ir.CRC32Castagnoli64(accumulator, data);
}
}();
X(32, Rd, result);
return true;
}
} // namespace Dynarmic::A64

View file

@ -683,6 +683,22 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) {
return Inst<U32>(Opcode::PackedSelect, ge, a, b);
}
U32 IREmitter::CRC32Castagnoli8(const U32& a, const U32& b) {
return Inst<U32>(Opcode::CRC32Castagnoli8, a, b);
}
U32 IREmitter::CRC32Castagnoli16(const U32& a, const U32& b) {
return Inst<U32>(Opcode::CRC32Castagnoli16, a, b);
}
U32 IREmitter::CRC32Castagnoli32(const U32& a, const U32& b) {
return Inst<U32>(Opcode::CRC32Castagnoli32, a, b);
}
U32 IREmitter::CRC32Castagnoli64(const U32& a, const U64& b) {
return Inst<U32>(Opcode::CRC32Castagnoli64, a, b);
}
UAny IREmitter::VectorGetElement(size_t esize, const U128& a, size_t index) {
ASSERT_MSG(esize * index < 128, "Invalid index");
switch (esize) {

View file

@ -186,6 +186,11 @@ public:
U32 PackedAbsDiffSumS8(const U32& a, const U32& b);
U32 PackedSelect(const U32& ge, const U32& a, const U32& b);
U32 CRC32Castagnoli8(const U32& a, const U32& b);
U32 CRC32Castagnoli16(const U32& a, const U32& b);
U32 CRC32Castagnoli32(const U32& a, const U32& b);
U32 CRC32Castagnoli64(const U32& a, const U64& b);
UAny VectorGetElement(size_t esize, const U128& a, size_t index);
U128 VectorAdd8(const U128& a, const U128& b);
U128 VectorAdd16(const U128& a, const U128& b);

View file

@ -167,6 +167,12 @@ OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32
OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
// CRC instructions
OPCODE(CRC32Castagnoli8, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli16, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli32, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli64, T::U32, T::U32, T::U64 )
// Vector instructions
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )