Squashed 'externals/xbyak/' changes from 2794cde7..671fc805
671fc805 update test/cybozu 8ca86231 remove mutable in Address 8b93498f add cmpsb/scasb/... 7eb62750 avoid core_sharing_data_cache = 0 for some cloud envrionment 85767e95 support mingw64 59573e6e add PROTECT_RE mode for protect() 71b75f65 fix push(qword[mem]) 811f4959 Merge branch 'rsdubtso-master' 8e3cb711 Account for potentially zero 0xb leaf when parsing cache/topology via cpuid a816249f update version fe083912 fix to avoid zero division for some virtual machine f0a8f7fa update version cac09b7a Merge pull request #62 from mgouicem/master 1f96b5e0 Fixes an error raised by clang < 3.9 c0f885ac Merge pull request #61 from mgouicem/master bfe2d201 Change default value for n_cores in setCacheHierarchy. fd587b55 change format and add getter for data_cache_size 80b3c7b9 remove macro 88189609 Merge branch 'mgouicem-master' e6b79723 Adding queries to get the cpu topology on Intel architectures. 221384f0 vmov* supports [mem]|k|z c04141ef define XBYAK_NO_OP_NAMES for test af7f05ee add const for Label git-subtree-dir: externals/xbyak git-subtree-split: 671fc805d09d075f48d4625f183ef2e1ef725106
This commit is contained in:
parent
9fb82036ca
commit
dbb1f8cf37
25 changed files with 425 additions and 204 deletions
|
@ -36,6 +36,7 @@
|
||||||
T_B32 = 1 << 26, // m32bcst
|
T_B32 = 1 << 26, // m32bcst
|
||||||
T_B64 = 1 << 27, // m64bcst
|
T_B64 = 1 << 27, // m64bcst
|
||||||
T_M_K = 1 << 28, // mem{k}
|
T_M_K = 1 << 28, // mem{k}
|
||||||
|
T_VSIB = 1 << 29,
|
||||||
T_XXX
|
T_XXX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -156,5 +157,9 @@ std::string type2String(int type)
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_M_K";
|
str += "T_M_K";
|
||||||
}
|
}
|
||||||
|
if (type & T_VSIB) {
|
||||||
|
if (!str.empty()) str += " | ";
|
||||||
|
str += "T_VSIB";
|
||||||
|
}
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
|
@ -202,12 +202,12 @@ void putM_X()
|
||||||
const char *name;
|
const char *name;
|
||||||
int type;
|
int type;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -533,7 +533,7 @@ void putGather()
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
std::string type = type2String(p.type);
|
std::string type = type2String(p.type | T_VSIB);
|
||||||
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -557,7 +557,7 @@ void putScatter()
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
std::string type = type2String(p.type);
|
std::string type = type2String(p.type | T_VSIB);
|
||||||
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -669,26 +669,41 @@ void putMisc()
|
||||||
{
|
{
|
||||||
puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }");
|
puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }");
|
||||||
puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }");
|
puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }");
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
int zm;
|
||||||
|
int type;
|
||||||
|
uint8 code;
|
||||||
|
bool isZmm;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true },
|
||||||
|
{ "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true },
|
||||||
|
{ "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false },
|
||||||
|
{ "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true },
|
||||||
|
|
||||||
puts("void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
|
{ "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true },
|
||||||
puts("void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
|
{ "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true },
|
||||||
puts("void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
|
{ "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false },
|
||||||
puts("void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
|
{ "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true },
|
||||||
|
|
||||||
puts("void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
|
{ "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true },
|
||||||
puts("void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
|
{ "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true },
|
||||||
puts("void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
|
{ "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false },
|
||||||
puts("void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
|
{ "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true },
|
||||||
|
|
||||||
puts("void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
|
{ "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true },
|
||||||
puts("void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
|
{ "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true },
|
||||||
puts("void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
|
{ "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false },
|
||||||
puts("void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
|
{ "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true },
|
||||||
|
};
|
||||||
puts("void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
puts("void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
|
const Tbl& p = tbl[i];
|
||||||
puts("void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
|
std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
|
||||||
puts("void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
|
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
|
||||||
|
, p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
||||||
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
||||||
|
|
|
@ -630,9 +630,18 @@ void put()
|
||||||
{ "cpuid", 0x0F, 0xA2 },
|
{ "cpuid", 0x0F, 0xA2 },
|
||||||
{ "cwd", 0x66, 0x99 },
|
{ "cwd", 0x66, 0x99 },
|
||||||
{ "cwde", 0x98 },
|
{ "cwde", 0x98 },
|
||||||
|
{ "cmpsb", 0xA6 },
|
||||||
|
{ "cmpsw", 0x66, 0xA7 },
|
||||||
|
{ "cmpsd", 0xA7 },
|
||||||
|
{ "scasb", 0xAE },
|
||||||
|
{ "scasw", 0x66, 0xAF },
|
||||||
|
{ "scasd", 0xAF },
|
||||||
{ "movsb", 0xA4 },
|
{ "movsb", 0xA4 },
|
||||||
{ "movsw", 0x66, 0xA5 },
|
{ "movsw", 0x66, 0xA5 },
|
||||||
{ "movsd", 0xA5 },
|
{ "movsd", 0xA5 },
|
||||||
|
{ "stosb", 0xAA },
|
||||||
|
{ "stosw", 0x66, 0xAB },
|
||||||
|
{ "stosd", 0xAB },
|
||||||
{ "rep", 0xF3 },
|
{ "rep", 0xF3 },
|
||||||
|
|
||||||
{ "lahf", 0x9F },
|
{ "lahf", 0x9F },
|
||||||
|
@ -1233,12 +1242,12 @@ void put()
|
||||||
const char *name;
|
const char *name;
|
||||||
int type;
|
int type;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 },
|
{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
|
||||||
{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 },
|
{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
|
||||||
{ 0x7F, "movdqa", T_0F | T_66 | T_YMM },
|
{ 0x7F, "movdqa", T_0F | T_66 | T_YMM },
|
||||||
{ 0x7F, "movdqu", T_0F | T_F3 | T_YMM },
|
{ 0x7F, "movdqu", T_0F | T_F3 | T_YMM },
|
||||||
{ 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 },
|
{ 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
|
||||||
{ 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 },
|
{ 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -1643,7 +1652,7 @@ void put()
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode);
|
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1678,7 +1687,10 @@ void put64()
|
||||||
const GenericTbl tbl[] = {
|
const GenericTbl tbl[] = {
|
||||||
{ "cdqe", 0x48, 0x98 },
|
{ "cdqe", 0x48, 0x98 },
|
||||||
{ "cqo", 0x48, 0x99 },
|
{ "cqo", 0x48, 0x99 },
|
||||||
|
{ "cmpsq", 0x48, 0xA7 },
|
||||||
{ "movsq", 0x48, 0xA5 },
|
{ "movsq", 0x48, 0xA5 },
|
||||||
|
{ "scasq", 0x48, 0xAF },
|
||||||
|
{ "stosq", 0x48, 0xAB },
|
||||||
};
|
};
|
||||||
putGeneric(tbl, NUM_OF_ARRAY(tbl));
|
putGeneric(tbl, NUM_OF_ARRAY(tbl));
|
||||||
|
|
||||||
|
|
14
readme.md
14
readme.md
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
Xbyak 5.601 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
Xbyak 5.67 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||||
=============
|
=============
|
||||||
|
|
||||||
Abstract
|
Abstract
|
||||||
|
@ -327,12 +327,16 @@ License
|
||||||
modified new BSD License
|
modified new BSD License
|
||||||
http://opensource.org/licenses/BSD-3-Clause
|
http://opensource.org/licenses/BSD-3-Clause
|
||||||
|
|
||||||
The files under test/cybozu/ are copied from cybozulib(https://github.com/herumi/cybozulib/),
|
|
||||||
which is licensed by BSD-3-Clause and are used for only tests.
|
|
||||||
The header files under xbyak/ are independent of cybozulib.
|
|
||||||
|
|
||||||
History
|
History
|
||||||
-------------
|
-------------
|
||||||
|
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||||
|
* 2018/Jul/26 ver 5.661 support mingw64
|
||||||
|
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||||
|
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
|
||||||
|
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
|
||||||
|
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
|
||||||
|
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||||
|
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
||||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||||
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
||||||
|
|
19
readme.txt
19
readme.txt
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.601
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.67
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -335,14 +335,17 @@ http://opensource.org/licenses/BSD-3-Clause
|
||||||
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||||
いただきました。
|
いただきました。
|
||||||
|
|
||||||
test/cybozu/以下のファイルはcybozulib(https://github.com/herumi/cybozulib/)
|
|
||||||
の一部を使っています。cybozulibはBSD-3-Clauseライセンスです。
|
|
||||||
cybozulibは単体テストでのみ利用されていて、xbyak/ディレクトリ以下のヘッダ
|
|
||||||
ファイルはcybozulibとは独立に利用できます。
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
|
||||||
|
2018/07/26 ver 5.661 mingw64対応
|
||||||
|
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
|
||||||
|
2018/06/26 ver 5.65 fix push(qword [mem])
|
||||||
|
2018/03/07 ver 5.64 Cpu()の中でzero divisionが出ることがあるのを修正
|
||||||
|
2018/02/14 ver 5.63 Cpu::setCacheHierarchy()の修正とclang<3.9のためのEvexModifierZero修正(thanks to mgouicem)
|
||||||
|
2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||||
|
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
|
||||||
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
|
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
|
||||||
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
|
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
|
||||||
2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加
|
2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加
|
||||||
|
@ -470,7 +473,3 @@ cybozulibは単体テストでのみ利用されていて、xbyak/ディレク
|
||||||
◎著作権者
|
◎著作権者
|
||||||
|
|
||||||
光成滋生(MITSUNARI Shigeo, herumi@nifty.com)
|
光成滋生(MITSUNARI Shigeo, herumi@nifty.com)
|
||||||
|
|
||||||
---
|
|
||||||
$Revision: 1.56 $
|
|
||||||
$Date: 2010/04/16 11:58:22 $
|
|
||||||
|
|
|
@ -13,7 +13,6 @@ struct Code : Xbyak::CodeGenerator {
|
||||||
{
|
{
|
||||||
puts("generate");
|
puts("generate");
|
||||||
printf("ptr=%p, %p\n", getCode(), buf);
|
printf("ptr=%p, %p\n", getCode(), buf);
|
||||||
Xbyak::CodeArray::protect(buf, sizeof(buf), true);
|
|
||||||
#ifdef XBYAK32
|
#ifdef XBYAK32
|
||||||
mov(eax, ptr [esp + 4]);
|
mov(eax, ptr [esp + 4]);
|
||||||
add(eax, ptr [esp + 8]);
|
add(eax, ptr [esp + 8]);
|
||||||
|
@ -23,6 +22,11 @@ struct Code : Xbyak::CodeGenerator {
|
||||||
lea(rax, ptr [rdi + rsi]);
|
lea(rax, ptr [rdi + rsi]);
|
||||||
#endif
|
#endif
|
||||||
ret();
|
ret();
|
||||||
|
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RE);
|
||||||
|
}
|
||||||
|
~Code()
|
||||||
|
{
|
||||||
|
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RW);
|
||||||
}
|
}
|
||||||
} s_code;
|
} s_code;
|
||||||
|
|
||||||
|
|
|
@ -162,18 +162,21 @@ int main()
|
||||||
{
|
{
|
||||||
// use memory allocated by user
|
// use memory allocated by user
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
const size_t codeSize = 1024;
|
const size_t codeSize = 4096;
|
||||||
uint8 buf[codeSize + 16];
|
uint8 buf[codeSize + 16];
|
||||||
uint8 *p = CodeArray::getAlignedAddress(buf);
|
uint8 *p = CodeArray::getAlignedAddress(buf);
|
||||||
CodeArray::protect(p, codeSize, true);
|
|
||||||
Sample s(p, codeSize);
|
Sample s(p, codeSize);
|
||||||
|
if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) {
|
||||||
|
fprintf(stderr, "can't protect\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||||
if (Xbyak::CastTo<uint8*>(func) != p) {
|
if (Xbyak::CastTo<uint8*>(func) != p) {
|
||||||
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
|
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
printf("0 + ... + %d = %d\n", 100, func(100));
|
printf("0 + ... + %d = %d\n", 100, func(100));
|
||||||
CodeArray::protect(p, codeSize, false);
|
CodeArray::protect(p, codeSize, CodeArray::PROTECT_RW);
|
||||||
}
|
}
|
||||||
puts("OK");
|
puts("OK");
|
||||||
testReset();
|
testReset();
|
||||||
|
|
|
@ -104,6 +104,9 @@ void putCPUinfo()
|
||||||
Core i7-3930K 6 2D
|
Core i7-3930K 6 2D
|
||||||
*/
|
*/
|
||||||
cpu.putFamily();
|
cpu.putFamily();
|
||||||
|
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||||
|
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
|
|
@ -37,6 +37,7 @@ test: normalize_prefix jmp bad_address $(TARGET)
|
||||||
$(MAKE) -C ../gen
|
$(MAKE) -C ../gen
|
||||||
./test_nm.sh
|
./test_nm.sh
|
||||||
./test_nm.sh Y
|
./test_nm.sh Y
|
||||||
|
./test_nm.sh avx512
|
||||||
./test_address.sh
|
./test_address.sh
|
||||||
./jmp
|
./jmp
|
||||||
./bad_address
|
./bad_address
|
||||||
|
|
|
@ -27,6 +27,8 @@ struct Code : Xbyak::CodeGenerator {
|
||||||
TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]));
|
TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]));
|
||||||
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
|
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
|
||||||
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
|
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
|
||||||
|
TEST_EXCEPTION(mov(eax, ptr [xmm0]));
|
||||||
|
TEST_EXCEPTION(fld(dword [xmm0]));
|
||||||
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3));
|
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3));
|
||||||
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3));
|
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3));
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
|
|
||||||
Copyright (c) 2007-2012 Cybozu Labs, Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
Redistributions of source code must retain the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer.
|
|
||||||
Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer in the documentation
|
|
||||||
and/or other materials provided with the distribution.
|
|
||||||
Neither the name of the Cybozu Labs, Inc. nor the names of its contributors may
|
|
||||||
be used to endorse or promote products derived from this software without
|
|
||||||
specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
||||||
THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
@ -2,10 +2,11 @@
|
||||||
/**
|
/**
|
||||||
@file
|
@file
|
||||||
@brief int type definition and macros
|
@brief int type definition and macros
|
||||||
Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
|
@author MITSUNARI Shigeo(@herumi)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(_MSC_VER) && (MSC_VER <= 1500)
|
#if defined(_MSC_VER) && (MSC_VER <= 1500) && !defined(CYBOZU_DEFINED_INTXX)
|
||||||
|
#define CYBOZU_DEFINED_INTXX
|
||||||
typedef __int64 int64_t;
|
typedef __int64 int64_t;
|
||||||
typedef unsigned __int64 uint64_t;
|
typedef unsigned __int64 uint64_t;
|
||||||
typedef unsigned int uint32_t;
|
typedef unsigned int uint32_t;
|
||||||
|
@ -38,27 +39,33 @@
|
||||||
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
|
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef CYBOZU_FORCE_INLINE
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define CYBOZU_FORCE_INLINE __forceinline
|
||||||
|
#else
|
||||||
|
#define CYBOZU_FORCE_INLINE __attribute__((always_inline))
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifndef CYBOZU_UNUSED
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define CYBOZU_UNUSED __attribute__((unused))
|
||||||
|
#else
|
||||||
|
#define CYBOZU_UNUSED
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#ifndef CYBOZU_ALLOCA
|
#ifndef CYBOZU_ALLOCA
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#define CYBOZU_ALLOCA(x) _malloca(x)
|
#define CYBOZU_ALLOCA(x) _malloca(x)
|
||||||
#else
|
#else
|
||||||
#define CYBOZU_ALLOCA_(x) __builtin_alloca(x)
|
#define CYBOZU_ALLOCA(x) __builtin_alloca(x)
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#ifndef CYBOZU_FOREACH
|
|
||||||
// std::vector<int> v; CYBOZU_FOREACH(auto x, v) {...}
|
|
||||||
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
|
|
||||||
#define CYBOZU_FOREACH(type_x, xs) for each (type_x in xs)
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define CYBOZU_FOREACH(type_x, xs) for (type_x : xs)
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#ifndef CYBOZU_NUM_OF_ARRAY
|
#ifndef CYBOZU_NUM_OF_ARRAY
|
||||||
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
|
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
|
||||||
#endif
|
#endif
|
||||||
#ifndef CYBOZU_SNPRINTF
|
#ifndef CYBOZU_SNPRINTF
|
||||||
#ifdef _MSC_VER
|
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
|
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
|
||||||
#else
|
#else
|
||||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
|
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
|
||||||
|
@ -68,20 +75,36 @@
|
||||||
#define CYBOZU_CPP_VERSION_CPP03 0
|
#define CYBOZU_CPP_VERSION_CPP03 0
|
||||||
#define CYBOZU_CPP_VERSION_TR1 1
|
#define CYBOZU_CPP_VERSION_TR1 1
|
||||||
#define CYBOZU_CPP_VERSION_CPP11 2
|
#define CYBOZU_CPP_VERSION_CPP11 2
|
||||||
|
#define CYBOZU_CPP_VERSION_CPP14 3
|
||||||
|
#define CYBOZU_CPP_VERSION_CPP17 4
|
||||||
|
|
||||||
#if (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
|
#ifdef __GNUC__
|
||||||
|
#define CYBOZU_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
|
||||||
|
#else
|
||||||
|
#define CYBOZU_GNUC_PREREQ(major, minor) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (__cplusplus >= 201703)
|
||||||
|
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
|
||||||
|
#elif (__cplusplus >= 201402)
|
||||||
|
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
|
||||||
|
#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||||
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
|
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
|
||||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||||
#else
|
#else
|
||||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
|
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
|
||||||
#endif
|
#endif
|
||||||
#elif (__GNUC__ >= 4 && __GNUC_MINOR__ >= 5) || (__clang_major__ >= 3)
|
#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
|
||||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||||
#else
|
#else
|
||||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
|
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1)
|
#ifdef CYBOZU_USE_BOOST
|
||||||
|
#define CYBOZU_NAMESPACE_STD boost
|
||||||
|
#define CYBOZU_NAMESPACE_TR1_BEGIN
|
||||||
|
#define CYBOZU_NAMESPACE_TR1_END
|
||||||
|
#elif (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1) && !defined(__APPLE__)
|
||||||
#define CYBOZU_NAMESPACE_STD std::tr1
|
#define CYBOZU_NAMESPACE_STD std::tr1
|
||||||
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
|
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
|
||||||
#define CYBOZU_NAMESPACE_TR1_END }
|
#define CYBOZU_NAMESPACE_TR1_END }
|
||||||
|
@ -92,25 +115,44 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CYBOZU_OS_BIT
|
#ifndef CYBOZU_OS_BIT
|
||||||
#if defined(_WIN64) || defined(__x86_64__)
|
#if defined(_WIN64) || defined(__x86_64__) || defined(__AARCH64EL__) || defined(__EMSCRIPTEN__)
|
||||||
#define CYBOZU_OS_BIT 64
|
#define CYBOZU_OS_BIT 64
|
||||||
#else
|
#else
|
||||||
#define CYBOZU_OS_BIT 32
|
#define CYBOZU_OS_BIT 32
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef CYBOZU_HOST
|
||||||
|
#define CYBOZU_HOST_UNKNOWN 0
|
||||||
|
#define CYBOZU_HOST_INTEL 1
|
||||||
|
#define CYBOZU_HOST_ARM 2
|
||||||
|
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
|
||||||
|
#define CYBOZU_HOST CYBOZU_HOST_INTEL
|
||||||
|
#elif defined(__arm__) || defined(__AARCH64EL__)
|
||||||
|
#define CYBOZU_HOST CYBOZU_HOST_ARM
|
||||||
|
#else
|
||||||
|
#define CYBOZU_HOST CYBOZU_HOST_UNKNOWN
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef CYBOZU_ENDIAN
|
#ifndef CYBOZU_ENDIAN
|
||||||
#define CYBOZU_ENDIAN_UNKNOWN 0
|
#define CYBOZU_ENDIAN_UNKNOWN 0
|
||||||
#define CYBOZU_ENDIAN_LITTLE 1
|
#define CYBOZU_ENDIAN_LITTLE 1
|
||||||
#define CYBOZU_ENDIAN_BIG 2
|
#define CYBOZU_ENDIAN_BIG 2
|
||||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
|
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL)
|
||||||
|
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||||
|
#elif (CYBOZU_HOST == CYBOZU_HOST_ARM) && (defined(__ARM_EABI__) || defined(__AARCH64EL__))
|
||||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||||
#else
|
#else
|
||||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
|
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
|
||||||
|
#define CYBOZU_NOEXCEPT noexcept
|
||||||
|
#else
|
||||||
|
#define CYBOZU_NOEXCEPT throw()
|
||||||
|
#endif
|
||||||
namespace cybozu {
|
namespace cybozu {
|
||||||
template<class T>
|
template<class T>
|
||||||
void disable_warning_unused_variable(const T&) { }
|
void disable_warning_unused_variable(const T&) { }
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
@file
|
@file
|
||||||
@brief unit test class
|
@brief unit test class
|
||||||
|
|
||||||
Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
|
@author MITSUNARI Shigeo(@herumi)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
|
@ -1143,10 +1143,11 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
} code;
|
} code;
|
||||||
Xbyak::CodeArray::protect(p, 4096, true);
|
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RE);
|
||||||
code.getCode<void (*)()>()();
|
code.getCode<void (*)()>()();
|
||||||
CYBOZU_TEST_EQUAL(*x0, 123);
|
CYBOZU_TEST_EQUAL(*x0, 123);
|
||||||
CYBOZU_TEST_EQUAL(*x1, 456);
|
CYBOZU_TEST_EQUAL(*x1, 456);
|
||||||
CYBOZU_TEST_EQUAL(buf[8], 99);
|
CYBOZU_TEST_EQUAL(buf[8], 99);
|
||||||
|
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RW);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -840,9 +840,9 @@ public:
|
||||||
put(p.name, _YMM|YMM_KZ, _YMM|MEM);
|
put(p.name, _YMM|YMM_KZ, _YMM|MEM);
|
||||||
put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
|
put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
|
||||||
if (!p.M_X) continue;
|
if (!p.M_X) continue;
|
||||||
put(p.name, MEM, _XMM);
|
put(p.name, MEM|MEM_K, _XMM);
|
||||||
put(p.name, MEM, _YMM);
|
put(p.name, MEM|MEM_K, _YMM);
|
||||||
put(p.name, MEM, _ZMM);
|
put(p.name, MEM|MEM_K, _ZMM);
|
||||||
}
|
}
|
||||||
put("vsqrtpd", XMM_KZ, M_1to2 | _MEM);
|
put("vsqrtpd", XMM_KZ, M_1to2 | _MEM);
|
||||||
put("vsqrtpd", YMM_KZ, M_1to4 | _MEM);
|
put("vsqrtpd", YMM_KZ, M_1to4 | _MEM);
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#define XBYAK_NO_OP_NAMES
|
||||||
#include "xbyak/xbyak.h"
|
#include "xbyak/xbyak.h"
|
||||||
#include "xbyak/xbyak_bin2hex.h"
|
#include "xbyak/xbyak_bin2hex.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
@ -121,6 +122,15 @@ class Test {
|
||||||
void operator=(const Test&);
|
void operator=(const Test&);
|
||||||
const bool isXbyak_;
|
const bool isXbyak_;
|
||||||
int funcNum_;
|
int funcNum_;
|
||||||
|
/*
|
||||||
|
and_, or_, xor_, not_ => and, or, xor, not
|
||||||
|
*/
|
||||||
|
std::string removeUnderScore(std::string s) const
|
||||||
|
{
|
||||||
|
if (!isXbyak_ && s[s.size() - 1] == '_') s.resize(s.size() - 1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
// check all op1, op2, op3
|
// check all op1, op2, op3
|
||||||
void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
|
void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
|
||||||
{
|
{
|
||||||
|
@ -448,6 +458,10 @@ class Test {
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
"cdqe",
|
"cdqe",
|
||||||
"cqo",
|
"cqo",
|
||||||
|
"cmpsq",
|
||||||
|
"movsq",
|
||||||
|
"scasq",
|
||||||
|
"stosq",
|
||||||
#else
|
#else
|
||||||
"aaa",
|
"aaa",
|
||||||
"aad",
|
"aad",
|
||||||
|
@ -476,6 +490,18 @@ class Test {
|
||||||
|
|
||||||
"lahf",
|
"lahf",
|
||||||
// "lock",
|
// "lock",
|
||||||
|
"cmpsb",
|
||||||
|
"cmpsw",
|
||||||
|
"cmpsd",
|
||||||
|
"movsb",
|
||||||
|
"movsw",
|
||||||
|
"movsd",
|
||||||
|
"scasb",
|
||||||
|
"scasw",
|
||||||
|
"scasd",
|
||||||
|
"stosb",
|
||||||
|
"stosw",
|
||||||
|
"stosd",
|
||||||
"nop",
|
"nop",
|
||||||
|
|
||||||
"sahf",
|
"sahf",
|
||||||
|
@ -951,15 +977,16 @@ class Test {
|
||||||
static const char tbl[][16] = {
|
static const char tbl[][16] = {
|
||||||
"adc",
|
"adc",
|
||||||
"add",
|
"add",
|
||||||
"and",
|
"and_",
|
||||||
"cmp",
|
"cmp",
|
||||||
"or",
|
"or_",
|
||||||
"sbb",
|
"sbb",
|
||||||
"sub",
|
"sub",
|
||||||
"xor",
|
"xor_",
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const char *p = tbl[i];
|
const std::string s = removeUnderScore(tbl[i]);
|
||||||
|
const char *p = s.c_str();
|
||||||
put(p, REG32, REG32|MEM);
|
put(p, REG32, REG32|MEM);
|
||||||
put(p, REG64, REG64|MEM);
|
put(p, REG64, REG64|MEM);
|
||||||
put(p, REG16, REG16|MEM);
|
put(p, REG16, REG16|MEM);
|
||||||
|
@ -1017,10 +1044,11 @@ class Test {
|
||||||
"imul",
|
"imul",
|
||||||
"mul",
|
"mul",
|
||||||
"neg",
|
"neg",
|
||||||
"not",
|
"not_",
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const char *p = tbl[i];
|
const std::string s = removeUnderScore(tbl[i]);
|
||||||
|
const char *p = s.c_str();
|
||||||
put(p, REG32e|REG16|REG8|REG8_3);
|
put(p, REG32e|REG16|REG8|REG8_3);
|
||||||
put(p, MEM32|MEM16|MEM8);
|
put(p, MEM32|MEM16|MEM8);
|
||||||
}
|
}
|
||||||
|
@ -1042,15 +1070,19 @@ class Test {
|
||||||
push word 2
|
push word 2
|
||||||
reduce 2-byte stack, so I can't support it
|
reduce 2-byte stack, so I can't support it
|
||||||
*/
|
*/
|
||||||
const char *p = "push";
|
|
||||||
put(p, REG16);
|
|
||||||
put(p, IMM8); // IMM16 decrease -2 from esp
|
|
||||||
put(p, MEM16);
|
|
||||||
|
|
||||||
|
put("push", IMM8|IMM32);
|
||||||
|
if (isXbyak_) {
|
||||||
|
puts("push(word, 1000);dump();");
|
||||||
|
} else {
|
||||||
|
puts("push word 1000");
|
||||||
|
}
|
||||||
|
|
||||||
|
put("push", REG16|MEM16);
|
||||||
put("pop", REG16|MEM16);
|
put("pop", REG16|MEM16);
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
put("push", REG64);
|
put("push", REG64|IMM32|MEM64);
|
||||||
put("pop", REG64);
|
put("pop", REG64|MEM64);
|
||||||
#else
|
#else
|
||||||
put("push", REG32|IMM32|MEM32);
|
put("push", REG32|IMM32|MEM32);
|
||||||
put("pop", REG32|MEM32);
|
put("pop", REG32|MEM32);
|
||||||
|
@ -2672,7 +2704,7 @@ public:
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const char *name = tbl[i];
|
const char *name = tbl[i];
|
||||||
put(name, MEM, ZMM);
|
put(name, MEM|MEM_K, ZMM|XMM|YMM);
|
||||||
put(name, ZMM, MEM);
|
put(name, ZMM, MEM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#define XBYAK_NO_OP_NAMES
|
||||||
#define XBYAK_ENABLE_OMITTED_OPERAND
|
#define XBYAK_ENABLE_OMITTED_OPERAND
|
||||||
#include "xbyak/xbyak.h"
|
#include "xbyak/xbyak.h"
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ address %1% jit > nm.cpp
|
||||||
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||||
nm_frame > x.lst
|
nm_frame > x.lst
|
||||||
diff x.lst ok.lst
|
diff -w x.lst ok.lst
|
||||||
wc x.lst
|
wc x.lst
|
||||||
|
|
||||||
:end
|
:end
|
||||||
|
|
|
@ -38,5 +38,5 @@ make_nm jit > nm.cpp
|
||||||
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||||
nm_frame |%FILTER% > x.lst
|
nm_frame |%FILTER% > x.lst
|
||||||
diff x.lst ok.lst
|
diff -w x.lst ok.lst
|
||||||
wc x.lst
|
wc x.lst
|
||||||
|
|
|
@ -27,5 +27,5 @@ awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti
|
||||||
make_512 jit > nm.cpp
|
make_512 jit > nm.cpp
|
||||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
|
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
|
||||||
nm_frame |%FILTER% > x.lst
|
nm_frame |%FILTER% > x.lst
|
||||||
diff x.lst ok.lst
|
diff -w x.lst ok.lst
|
||||||
wc x.lst
|
wc x.lst
|
||||||
|
|
|
@ -39,5 +39,5 @@ if /i "%Y%"=="1" (
|
||||||
make_nm jit > nm.cpp
|
make_nm jit > nm.cpp
|
||||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||||
nm_frame |%FILTER% > x.lst
|
nm_frame |%FILTER% > x.lst
|
||||||
diff x.lst ok.lst
|
diff -w x.lst ok.lst
|
||||||
wc x.lst
|
wc x.lst
|
||||||
|
|
|
@ -19,6 +19,12 @@ else if ($1 == "Y64") then
|
||||||
set OPT2="-DUSE_YASM -DXBYAK64"
|
set OPT2="-DUSE_YASM -DXBYAK64"
|
||||||
set OPT3=win64
|
set OPT3=win64
|
||||||
set FILTER=./normalize_prefix
|
set FILTER=./normalize_prefix
|
||||||
|
else if ($1 == "avx512") then
|
||||||
|
echo "nasm(64bit) + avx512"
|
||||||
|
set EXE=nasm
|
||||||
|
set OPT2="-DXBYAK64 -DUSE_AVX512"
|
||||||
|
set OPT3=win64
|
||||||
|
set FILTER=./normalize_prefix
|
||||||
else
|
else
|
||||||
echo "nasm(32bit)"
|
echo "nasm(32bit)"
|
||||||
set EXE=nasm
|
set EXE=nasm
|
||||||
|
|
|
@ -105,7 +105,7 @@ namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x5601 /* 0xABCD = A.BC(D) */
|
VERSION = 0x5670 /* 0xABCD = A.BC(D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
@ -566,7 +566,7 @@ struct EvexModifierRounding {
|
||||||
explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
|
explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
|
||||||
int rounding;
|
int rounding;
|
||||||
};
|
};
|
||||||
struct EvexModifierZero{};
|
struct EvexModifierZero{EvexModifierZero() {}};
|
||||||
|
|
||||||
struct Xmm : public Mmx {
|
struct Xmm : public Mmx {
|
||||||
explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
|
explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
|
||||||
|
@ -614,16 +614,16 @@ struct Reg64 : public Reg32e {
|
||||||
};
|
};
|
||||||
struct RegRip {
|
struct RegRip {
|
||||||
sint64 disp_;
|
sint64 disp_;
|
||||||
Label* label_;
|
const Label* label_;
|
||||||
bool isAddr_;
|
bool isAddr_;
|
||||||
explicit RegRip(sint64 disp = 0, Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
||||||
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
||||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||||
}
|
}
|
||||||
friend const RegRip operator-(const RegRip& r, sint64 disp) {
|
friend const RegRip operator-(const RegRip& r, sint64 disp) {
|
||||||
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
|
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
|
||||||
}
|
}
|
||||||
friend const RegRip operator+(const RegRip& r, Label& label) {
|
friend const RegRip operator+(const RegRip& r, const Label& label) {
|
||||||
if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING);
|
if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING);
|
||||||
return RegRip(r.disp_, &label);
|
return RegRip(r.disp_, &label);
|
||||||
}
|
}
|
||||||
|
@ -848,10 +848,15 @@ protected:
|
||||||
uint64 disp = i->getVal(top_);
|
uint64 disp = i->getVal(top_);
|
||||||
rewrite(i->codeOffset, disp, i->jmpSize);
|
rewrite(i->codeOffset, disp, i->jmpSize);
|
||||||
}
|
}
|
||||||
if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT);
|
if (alloc_->useProtect() && !protect(top_, size_, PROTECT_RWE)) throw Error(ERR_CANT_PROTECT);
|
||||||
isCalledCalcJmpAddress_ = true;
|
isCalledCalcJmpAddress_ = true;
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
enum ProtectMode {
|
||||||
|
PROTECT_RW = 0, // read/write
|
||||||
|
PROTECT_RWE = 1, // read/write/exec
|
||||||
|
PROTECT_RE = 2 // read/exec
|
||||||
|
};
|
||||||
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
|
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
|
||||||
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
|
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
|
||||||
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
|
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
|
||||||
|
@ -861,7 +866,7 @@ public:
|
||||||
, isCalledCalcJmpAddress_(false)
|
, isCalledCalcJmpAddress_(false)
|
||||||
{
|
{
|
||||||
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
|
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
|
||||||
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) {
|
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, PROTECT_RWE)) {
|
||||||
alloc_->free(top_);
|
alloc_->free(top_);
|
||||||
throw Error(ERR_CANT_PROTECT);
|
throw Error(ERR_CANT_PROTECT);
|
||||||
}
|
}
|
||||||
|
@ -869,7 +874,7 @@ public:
|
||||||
virtual ~CodeArray()
|
virtual ~CodeArray()
|
||||||
{
|
{
|
||||||
if (isAllocType()) {
|
if (isAllocType()) {
|
||||||
if (alloc_->useProtect()) protect(top_, maxSize_, false);
|
if (alloc_->useProtect()) protect(top_, maxSize_, PROTECT_RW);
|
||||||
alloc_->free(top_);
|
alloc_->free(top_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -960,19 +965,36 @@ public:
|
||||||
change exec permission of memory
|
change exec permission of memory
|
||||||
@param addr [in] buffer address
|
@param addr [in] buffer address
|
||||||
@param size [in] buffer size
|
@param size [in] buffer size
|
||||||
@param canExec [in] true(enable to exec), false(disable to exec)
|
@param protectMode [in] mode(RW/RWE/RE)
|
||||||
@return true(success), false(failure)
|
@return true(success), false(failure)
|
||||||
*/
|
*/
|
||||||
static inline bool protect(const void *addr, size_t size, bool canExec)
|
static inline bool protect(const void *addr, size_t size, int protectMode)
|
||||||
{
|
{
|
||||||
|
#if defined(_WIN32)
|
||||||
|
const DWORD c_rw = PAGE_READWRITE;
|
||||||
|
const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
|
||||||
|
const DWORD c_re = PAGE_EXECUTE_READ;
|
||||||
|
DWORD mode;
|
||||||
|
#else
|
||||||
|
const int c_rw = PROT_READ | PROT_WRITE;
|
||||||
|
const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||||
|
const int c_re = PROT_READ | PROT_EXEC;
|
||||||
|
int mode;
|
||||||
|
#endif
|
||||||
|
switch (protectMode) {
|
||||||
|
case PROTECT_RW: mode = c_rw; break;
|
||||||
|
case PROTECT_RWE: mode = c_rwe; break;
|
||||||
|
case PROTECT_RE: mode = c_re; break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
DWORD oldProtect;
|
DWORD oldProtect;
|
||||||
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
|
return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||||
int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
|
|
||||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||||
#else
|
#else
|
||||||
return true;
|
return true;
|
||||||
|
@ -999,46 +1021,43 @@ public:
|
||||||
M_ripAddr
|
M_ripAddr
|
||||||
};
|
};
|
||||||
Address(uint32 sizeBit, bool broadcast, const RegExp& e)
|
Address(uint32 sizeBit, bool broadcast, const RegExp& e)
|
||||||
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast)
|
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
|
||||||
{
|
{
|
||||||
e_.verify();
|
e_.verify();
|
||||||
}
|
}
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
explicit Address(size_t disp)
|
explicit Address(size_t disp)
|
||||||
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ }
|
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
|
||||||
Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
|
Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
|
||||||
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), permitVsib_(false), broadcast_(broadcast) { }
|
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
|
||||||
#endif
|
#endif
|
||||||
void permitVsib() const { permitVsib_ = true; }
|
|
||||||
RegExp getRegExp(bool optimize = true) const
|
RegExp getRegExp(bool optimize = true) const
|
||||||
{
|
{
|
||||||
return optimize ? e_.optimize() : e_;
|
return optimize ? e_.optimize() : e_;
|
||||||
}
|
}
|
||||||
Mode getMode() const { return mode_; }
|
Mode getMode() const { return mode_; }
|
||||||
bool is32bit() const { verify(); return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
|
bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
|
||||||
bool isOnlyDisp() const { verify(); return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
|
bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
|
||||||
size_t getDisp() const { verify(); return e_.getDisp(); }
|
size_t getDisp() const { return e_.getDisp(); }
|
||||||
uint8 getRex() const
|
uint8 getRex() const
|
||||||
{
|
{
|
||||||
verify();
|
|
||||||
if (mode_ != M_ModRM) return 0;
|
if (mode_ != M_ModRM) return 0;
|
||||||
return getRegExp().getRex();
|
return getRegExp().getRex();
|
||||||
}
|
}
|
||||||
bool is64bitDisp() const { verify(); return mode_ == M_64bitDisp; } // for moffset
|
bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
|
||||||
bool isBroadcast() const { return broadcast_; }
|
bool isBroadcast() const { return broadcast_; }
|
||||||
const Label* getLabel() const { return label_; }
|
const Label* getLabel() const { return label_; }
|
||||||
bool operator==(const Address& rhs) const
|
bool operator==(const Address& rhs) const
|
||||||
{
|
{
|
||||||
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_;
|
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
|
||||||
}
|
}
|
||||||
bool operator!=(const Address& rhs) const { return !operator==(rhs); }
|
bool operator!=(const Address& rhs) const { return !operator==(rhs); }
|
||||||
|
bool isVsib() const { return e_.isVsib(); }
|
||||||
private:
|
private:
|
||||||
RegExp e_;
|
RegExp e_;
|
||||||
const Label* label_;
|
const Label* label_;
|
||||||
Mode mode_;
|
Mode mode_;
|
||||||
mutable bool permitVsib_;
|
|
||||||
bool broadcast_;
|
bool broadcast_;
|
||||||
void verify() const { if (e_.isVsib() && !permitVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
inline const Address& Operand::getAddress() const
|
inline const Address& Operand::getAddress() const
|
||||||
|
@ -1443,6 +1462,7 @@ private:
|
||||||
T_B32 = 1 << 26, // m32bcst
|
T_B32 = 1 << 26, // m32bcst
|
||||||
T_B64 = 1 << 27, // m64bcst
|
T_B64 = 1 << 27, // m64bcst
|
||||||
T_M_K = 1 << 28, // mem{k}
|
T_M_K = 1 << 28, // mem{k}
|
||||||
|
T_VSIB = 1 << 29,
|
||||||
T_XXX
|
T_XXX
|
||||||
};
|
};
|
||||||
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
||||||
|
@ -1669,8 +1689,9 @@ private:
|
||||||
// reg is reg field of ModRM
|
// reg is reg field of ModRM
|
||||||
// immSize is the size for immediate value
|
// immSize is the size for immediate value
|
||||||
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
|
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
|
||||||
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0)
|
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
|
||||||
{
|
{
|
||||||
|
if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||||
if (addr.getMode() == Address::M_ModRM) {
|
if (addr.getMode() == Address::M_ModRM) {
|
||||||
setSIB(addr.getRegExp(), reg, disp8N);
|
setSIB(addr.getRegExp(), reg, disp8N);
|
||||||
} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
|
} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
|
||||||
|
@ -1812,15 +1833,20 @@ private:
|
||||||
}
|
}
|
||||||
void opPushPop(const Operand& op, int code, int ext, int alt)
|
void opPushPop(const Operand& op, int code, int ext, int alt)
|
||||||
{
|
{
|
||||||
if (op.isREG()) {
|
int bit = op.getBit();
|
||||||
if (op.isBit(16)) db(0x66);
|
if (bit == 16 || bit == BIT) {
|
||||||
if (op.getReg().getIdx() >= 8) db(0x41);
|
if (bit == 16) db(0x66);
|
||||||
db(alt | (op.getIdx() & 7));
|
if (op.isREG()) {
|
||||||
} else if (op.isMEM()) {
|
if (op.getReg().getIdx() >= 8) db(0x41);
|
||||||
opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
|
db(alt | (op.getIdx() & 7));
|
||||||
} else {
|
return;
|
||||||
throw Error(ERR_BAD_COMBINATION);
|
}
|
||||||
|
if (op.isMEM()) {
|
||||||
|
opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
throw Error(ERR_BAD_COMBINATION);
|
||||||
}
|
}
|
||||||
void verifyMemHasSize(const Operand& op) const
|
void verifyMemHasSize(const Operand& op) const
|
||||||
{
|
{
|
||||||
|
@ -1925,7 +1951,7 @@ private:
|
||||||
} else {
|
} else {
|
||||||
vex(r, base, p1, type, code, x);
|
vex(r, base, p1, type, code, x);
|
||||||
}
|
}
|
||||||
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N);
|
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
|
||||||
} else {
|
} else {
|
||||||
const Reg& base = op2.getReg();
|
const Reg& base = op2.getReg();
|
||||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
|
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
|
||||||
|
@ -2026,8 +2052,7 @@ private:
|
||||||
}
|
}
|
||||||
if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||||
}
|
}
|
||||||
addr.permitVsib();
|
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
|
||||||
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code);
|
|
||||||
}
|
}
|
||||||
enum {
|
enum {
|
||||||
xx_yy_zz = 0,
|
xx_yy_zz = 0,
|
||||||
|
@ -2051,7 +2076,6 @@ private:
|
||||||
{
|
{
|
||||||
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
|
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
|
||||||
checkGather2(x, addr.getRegExp().getIndex(), mode);
|
checkGather2(x, addr.getRegExp().getIndex(), mode);
|
||||||
addr.permitVsib();
|
|
||||||
opVex(x, 0, addr, type, code);
|
opVex(x, 0, addr, type, code);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
@ -2071,7 +2095,6 @@ private:
|
||||||
{
|
{
|
||||||
if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
|
if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
|
||||||
if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||||
addr.permitVsib();
|
|
||||||
opVex(x, 0, addr, type, code);
|
opVex(x, 0, addr, type, code);
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
const char *getVersionString() const { return "5.601"; }
|
const char *getVersionString() const { return "5.67"; }
|
||||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||||
|
@ -122,8 +122,11 @@ void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
|
||||||
void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
|
void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
|
||||||
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
|
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
|
||||||
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
|
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
|
||||||
|
void cmpsb() { db(0xA6); }
|
||||||
|
void cmpsd() { db(0xA7); }
|
||||||
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
|
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
|
||||||
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
|
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
|
||||||
|
void cmpsw() { db(0x66); db(0xA7); }
|
||||||
void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
|
void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
|
||||||
void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
|
void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
|
||||||
void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
|
void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
|
||||||
|
@ -683,6 +686,9 @@ void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
|
||||||
void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
|
void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
|
||||||
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
|
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
|
||||||
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
|
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
|
||||||
|
void scasb() { db(0xAE); }
|
||||||
|
void scasd() { db(0xAF); }
|
||||||
|
void scasw() { db(0x66); db(0xAF); }
|
||||||
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
||||||
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
||||||
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
||||||
|
@ -742,6 +748,9 @@ void stc() { db(0xF9); }
|
||||||
void std() { db(0xFD); }
|
void std() { db(0xFD); }
|
||||||
void sti() { db(0xFB); }
|
void sti() { db(0xFB); }
|
||||||
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
|
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
|
||||||
|
void stosb() { db(0xAA); }
|
||||||
|
void stosd() { db(0xAB); }
|
||||||
|
void stosw() { db(0x66); db(0xAB); }
|
||||||
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
|
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
|
||||||
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
|
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
|
||||||
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
|
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
|
||||||
|
@ -1001,10 +1010,10 @@ void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X
|
||||||
void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
|
void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
|
||||||
void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
|
void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
|
||||||
void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
|
void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
|
||||||
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x92, 0); }
|
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
|
||||||
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x92, 1); }
|
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
|
||||||
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); }
|
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
|
||||||
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); }
|
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
|
||||||
void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
|
void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
|
||||||
void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
|
void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
|
||||||
void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
|
void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
|
||||||
|
@ -1030,9 +1039,9 @@ void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
||||||
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
|
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
|
||||||
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5D); }
|
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5D); }
|
||||||
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5D); }
|
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5D); }
|
||||||
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x29); }
|
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
||||||
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
|
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
|
||||||
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x29); }
|
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
||||||
void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
|
void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
|
||||||
void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
|
void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
|
||||||
void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
|
void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
|
||||||
|
@ -1068,9 +1077,9 @@ void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 |
|
||||||
void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); }
|
void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); }
|
||||||
void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
|
void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
|
||||||
void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
|
void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
|
||||||
void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x11); }
|
void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x11); }
|
||||||
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
|
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
|
||||||
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x11); }
|
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); }
|
||||||
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
|
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
|
||||||
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
|
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
|
||||||
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
|
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
|
||||||
|
@ -1135,10 +1144,10 @@ void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|
|
||||||
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
|
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
|
||||||
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
|
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
|
||||||
void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
|
void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
|
||||||
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x90, 1); }
|
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
|
||||||
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x90, 0); }
|
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
|
||||||
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x91, 2); }
|
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
|
||||||
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x91, 1); }
|
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); }
|
||||||
void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
|
void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
|
||||||
void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
|
void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
|
||||||
void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
|
void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
|
||||||
|
@ -1544,7 +1553,10 @@ void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
||||||
void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
||||||
void cdqe() { db(0x48); db(0x98); }
|
void cdqe() { db(0x48); db(0x98); }
|
||||||
void cqo() { db(0x48); db(0x99); }
|
void cqo() { db(0x48); db(0x99); }
|
||||||
|
void cmpsq() { db(0x48); db(0xA7); }
|
||||||
void movsq() { db(0x48); db(0xA5); }
|
void movsq() { db(0x48); db(0xA5); }
|
||||||
|
void scasq() { db(0x48); db(0xAF); }
|
||||||
|
void stosq() { db(0x48); db(0xAB); }
|
||||||
void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
|
void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
|
||||||
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
|
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
|
||||||
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
|
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
|
||||||
|
@ -1717,18 +1729,18 @@ void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(1
|
||||||
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
||||||
void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
||||||
void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
|
void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
|
||||||
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x92, 1); }
|
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
|
||||||
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x92, 0); }
|
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
|
||||||
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||||
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||||
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||||
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||||
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x93, 0); }
|
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
|
||||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x93, 2); }
|
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
|
||||||
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
|
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
|
||||||
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
|
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
|
||||||
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||||
|
@ -1745,17 +1757,17 @@ void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {i
|
||||||
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||||
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
|
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
|
||||||
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||||
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
|
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
|
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
|
void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
|
void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
|
void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
|
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
|
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
|
||||||
void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
|
void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
|
||||||
|
@ -1815,10 +1827,10 @@ void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T
|
||||||
void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
|
void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
|
||||||
void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
|
void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
|
||||||
void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
|
void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
|
||||||
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); }
|
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); }
|
||||||
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); }
|
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); }
|
||||||
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); }
|
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); }
|
||||||
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x91, 0); }
|
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
|
||||||
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
||||||
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
||||||
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
|
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
|
||||||
|
@ -1869,10 +1881,10 @@ void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.get
|
||||||
void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
|
void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
|
||||||
void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
|
void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
|
||||||
void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
|
void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
|
||||||
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 0); }
|
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
|
||||||
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 1); }
|
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
|
||||||
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 2); }
|
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
|
||||||
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 0); }
|
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
|
||||||
void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
|
void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
|
||||||
void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
|
void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
|
||||||
void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
|
void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
|
||||||
|
@ -1936,18 +1948,18 @@ void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
|
||||||
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
|
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
|
||||||
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||||
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||||
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 1); }
|
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
|
||||||
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 0); }
|
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
|
||||||
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||||
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||||
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||||
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||||
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 0); }
|
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
|
||||||
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 2); }
|
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
|
||||||
void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
|
void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
|
||||||
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
||||||
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
||||||
|
|
|
@ -84,6 +84,67 @@ class Cpu {
|
||||||
displayModel = model;
|
displayModel = model;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
|
||||||
|
{
|
||||||
|
return (val >> base) & ((1u << (end - base)) - 1);
|
||||||
|
}
|
||||||
|
void setCacheHierarchy()
|
||||||
|
{
|
||||||
|
if ((type_ & tINTEL) == 0) return;
|
||||||
|
const unsigned int NO_CACHE = 0;
|
||||||
|
const unsigned int DATA_CACHE = 1;
|
||||||
|
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||||
|
const unsigned int UNIFIED_CACHE = 3;
|
||||||
|
unsigned int smt_width = 0;
|
||||||
|
unsigned int n_cores = 0;
|
||||||
|
unsigned int data[4];
|
||||||
|
|
||||||
|
/*
|
||||||
|
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
||||||
|
If x2APIC is supported, these are the only correct numbers.
|
||||||
|
|
||||||
|
leaf 0xB can be zeroed-out by a hypervisor
|
||||||
|
*/
|
||||||
|
getCpuidEx(0x0, 0, data);
|
||||||
|
if (data[0] >= 0xB) {
|
||||||
|
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
||||||
|
smt_width = data[1] & 0x7FFF;
|
||||||
|
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
||||||
|
n_cores = data[1] & 0x7FFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Assumptions:
|
||||||
|
the first level of data cache is not shared (which is the
|
||||||
|
case for every existing architecture) and use this to
|
||||||
|
determine the SMT width for arch not supporting leaf 11.
|
||||||
|
when leaf 4 reports a number of core less than n_cores
|
||||||
|
on socket reported by leaf 11, then it is a correct number
|
||||||
|
of cores not an upperbound.
|
||||||
|
*/
|
||||||
|
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
|
||||||
|
getCpuidEx(0x4, i, data);
|
||||||
|
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||||
|
if (cacheType == NO_CACHE) break;
|
||||||
|
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||||
|
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||||
|
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||||
|
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
|
||||||
|
}
|
||||||
|
assert(nb_logical_cores != 0);
|
||||||
|
data_cache_size[data_cache_levels] =
|
||||||
|
(extractBit(data[1], 22, 31) + 1)
|
||||||
|
* (extractBit(data[1], 12, 21) + 1)
|
||||||
|
* (extractBit(data[1], 0, 11) + 1)
|
||||||
|
* (data[2] + 1);
|
||||||
|
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
|
||||||
|
assert(smt_width != 0);
|
||||||
|
cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
|
||||||
|
data_cache_levels++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int model;
|
int model;
|
||||||
int family;
|
int family;
|
||||||
|
@ -92,6 +153,25 @@ public:
|
||||||
int extFamily;
|
int extFamily;
|
||||||
int displayFamily; // family + extFamily
|
int displayFamily; // family + extFamily
|
||||||
int displayModel; // model + extModel
|
int displayModel; // model + extModel
|
||||||
|
|
||||||
|
// may I move these members into private?
|
||||||
|
static const unsigned int maxNumberCacheLevels = 10;
|
||||||
|
unsigned int data_cache_size[maxNumberCacheLevels];
|
||||||
|
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
|
||||||
|
unsigned int data_cache_levels;
|
||||||
|
|
||||||
|
unsigned int getDataCacheLevels() const { return data_cache_levels; }
|
||||||
|
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||||
|
{
|
||||||
|
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||||
|
return cores_sharing_data_cache[i];
|
||||||
|
}
|
||||||
|
unsigned int getDataCacheSize(unsigned int i) const
|
||||||
|
{
|
||||||
|
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||||
|
return data_cache_size[i];
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
data[] = { eax, ebx, ecx, edx }
|
data[] = { eax, ebx, ecx, edx }
|
||||||
*/
|
*/
|
||||||
|
@ -124,6 +204,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
typedef uint64 Type;
|
typedef uint64 Type;
|
||||||
|
|
||||||
static const Type NONE = 0;
|
static const Type NONE = 0;
|
||||||
static const Type tMMX = 1 << 0;
|
static const Type tMMX = 1 << 0;
|
||||||
static const Type tMMX2 = 1 << 1;
|
static const Type tMMX2 = 1 << 1;
|
||||||
|
@ -190,6 +271,7 @@ public:
|
||||||
|
|
||||||
Cpu()
|
Cpu()
|
||||||
: type_(NONE)
|
: type_(NONE)
|
||||||
|
, data_cache_levels(0)
|
||||||
{
|
{
|
||||||
unsigned int data[4];
|
unsigned int data[4];
|
||||||
const unsigned int& EAX = data[0];
|
const unsigned int& EAX = data[0];
|
||||||
|
@ -281,6 +363,7 @@ public:
|
||||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||||
}
|
}
|
||||||
setFamily();
|
setFamily();
|
||||||
|
setCacheHierarchy();
|
||||||
}
|
}
|
||||||
void putFamily() const
|
void putFamily() const
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue