Squashed 'externals/xbyak/' changes from 2794cde7..671fc805

671fc805 update test/cybozu
8ca86231 remove mutable in Address
8b93498f add cmpsb/scasb/...
7eb62750 avoid core_sharing_data_cache = 0 for some cloud envrionment
85767e95 support mingw64
59573e6e add PROTECT_RE mode for protect()
71b75f65 fix push(qword[mem])
811f4959 Merge branch 'rsdubtso-master'
8e3cb711 Account for potentially zero 0xb leaf when parsing cache/topology via cpuid
a816249f update version
fe083912 fix to avoid zero division for some virtual machine
f0a8f7fa update version
cac09b7a Merge pull request #62 from mgouicem/master
1f96b5e0 Fixes an error raised by clang < 3.9
c0f885ac Merge pull request #61 from mgouicem/master
bfe2d201 Change default value for n_cores in setCacheHierarchy.
fd587b55 change format and add getter for data_cache_size
80b3c7b9 remove macro
88189609 Merge branch 'mgouicem-master'
e6b79723 Adding queries to get the cpu topology on Intel architectures.
221384f0 vmov* supports [mem]|k|z
c04141ef define XBYAK_NO_OP_NAMES for test
af7f05ee add const for Label

git-subtree-dir: externals/xbyak
git-subtree-split: 671fc805d09d075f48d4625f183ef2e1ef725106
This commit is contained in:
MerryMage 2020-04-22 20:51:10 +01:00
parent 9fb82036ca
commit dbb1f8cf37
25 changed files with 425 additions and 204 deletions

View file

@ -36,6 +36,7 @@
T_B32 = 1 << 26, // m32bcst
T_B64 = 1 << 27, // m64bcst
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
T_XXX
};
@ -156,5 +157,9 @@ std::string type2String(int type)
if (!str.empty()) str += " | ";
str += "T_M_K";
}
if (type & T_VSIB) {
if (!str.empty()) str += " | ";
str += "T_VSIB";
}
return str;
}

View file

@ -202,12 +202,12 @@ void putM_X()
const char *name;
int type;
} tbl[] = {
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@ -533,7 +533,7 @@ void putGather()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
std::string type = type2String(p.type | T_VSIB);
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
}
}
@ -557,7 +557,7 @@ void putScatter()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
std::string type = type2String(p.type | T_VSIB);
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
}
}
@ -669,26 +669,41 @@ void putMisc()
{
puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }");
puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }");
{
const struct Tbl {
const char *name;
int zm;
int type;
uint8 code;
bool isZmm;
} tbl[] = {
{ "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true },
{ "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true },
{ "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false },
{ "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true },
puts("void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
{ "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true },
{ "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true },
{ "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false },
{ "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true },
puts("void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
{ "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true },
{ "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true },
{ "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false },
{ "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true },
puts("void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
{ "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true },
{ "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true },
{ "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false },
{ "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
, p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
}
}
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");

View file

@ -630,9 +630,18 @@ void put()
{ "cpuid", 0x0F, 0xA2 },
{ "cwd", 0x66, 0x99 },
{ "cwde", 0x98 },
{ "cmpsb", 0xA6 },
{ "cmpsw", 0x66, 0xA7 },
{ "cmpsd", 0xA7 },
{ "scasb", 0xAE },
{ "scasw", 0x66, 0xAF },
{ "scasd", 0xAF },
{ "movsb", 0xA4 },
{ "movsw", 0x66, 0xA5 },
{ "movsd", 0xA5 },
{ "stosb", 0xAA },
{ "stosw", 0x66, 0xAB },
{ "stosd", 0xAB },
{ "rep", 0xF3 },
{ "lahf", 0x9F },
@ -1233,12 +1242,12 @@ void put()
const char *name;
int type;
} tbl[] = {
{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 },
{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 },
{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
{ 0x7F, "movdqa", T_0F | T_66 | T_YMM },
{ 0x7F, "movdqu", T_0F | T_F3 | T_YMM },
{ 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 },
{ 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 },
{ 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
{ 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@ -1643,7 +1652,7 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode);
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode);
}
}
}
@ -1678,7 +1687,10 @@ void put64()
const GenericTbl tbl[] = {
{ "cdqe", 0x48, 0x98 },
{ "cqo", 0x48, 0x99 },
{ "cmpsq", 0x48, 0xA7 },
{ "movsq", 0x48, 0xA5 },
{ "scasq", 0x48, 0xAF },
{ "stosq", 0x48, 0xAB },
};
putGeneric(tbl, NUM_OF_ARRAY(tbl));

View file

@ -1,5 +1,5 @@
Xbyak 5.601 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
Xbyak 5.67 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
=============
Abstract
@ -327,12 +327,16 @@ License
modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
The files under test/cybozu/ are copied from cybozulib(https://github.com/herumi/cybozulib/),
which is licensed by BSD-3-Clause and are used for only tests.
The header files under xbyak/ are independent of cybozulib.
History
-------------
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
* 2018/Jul/26 ver 5.661 support mingw64
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix

View file

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.601
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.67
-----------------------------------------------------------------------------
◎概要
@ -335,14 +335,17 @@ http://opensource.org/licenses/BSD-3-Clause
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
いただきました。
test/cybozu/以下のファイルはcybozulib(https://github.com/herumi/cybozulib/)
の一部を使っています。cybozulibはBSD-3-Clauseライセンスです。
cybozulibは単体テストでのみ利用されていて、xbyak/ディレクトリ以下のヘッダ
ファイルはcybozulibとは独立に利用できます。
-----------------------------------------------------------------------------
◎履歴
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
2018/07/26 ver 5.661 mingw64対応
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
2018/06/26 ver 5.65 fix push(qword [mem])
2018/03/07 ver 5.64 Cpu()の中でzero divisionが出ることがあるのを修正
2018/02/14 ver 5.63 Cpu::setCacheHierarchy()の修正とclang<3.9のためのEvexModifierZero修正(thanks to mgouicem)
2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加
@ -470,7 +473,3 @@ cybozulibは単体テストでのみ利用されていて、xbyak/ディレク
◎著作権者
光成滋生(MITSUNARI Shigeo, herumi@nifty.com)
---
$Revision: 1.56 $
$Date: 2010/04/16 11:58:22 $

View file

@ -13,7 +13,6 @@ struct Code : Xbyak::CodeGenerator {
{
puts("generate");
printf("ptr=%p, %p\n", getCode(), buf);
Xbyak::CodeArray::protect(buf, sizeof(buf), true);
#ifdef XBYAK32
mov(eax, ptr [esp + 4]);
add(eax, ptr [esp + 8]);
@ -23,6 +22,11 @@ struct Code : Xbyak::CodeGenerator {
lea(rax, ptr [rdi + rsi]);
#endif
ret();
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RE);
}
~Code()
{
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RW);
}
} s_code;

View file

@ -162,18 +162,21 @@ int main()
{
// use memory allocated by user
using namespace Xbyak;
const size_t codeSize = 1024;
const size_t codeSize = 4096;
uint8 buf[codeSize + 16];
uint8 *p = CodeArray::getAlignedAddress(buf);
CodeArray::protect(p, codeSize, true);
Sample s(p, codeSize);
if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) {
fprintf(stderr, "can't protect\n");
return 1;
}
int (*func)(int) = s.getCode<int (*)(int)>();
if (Xbyak::CastTo<uint8*>(func) != p) {
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
return 1;
}
printf("0 + ... + %d = %d\n", 100, func(100));
CodeArray::protect(p, codeSize, false);
CodeArray::protect(p, codeSize, CodeArray::PROTECT_RW);
}
puts("OK");
testReset();

View file

@ -104,6 +104,9 @@ void putCPUinfo()
Core i7-3930K 6 2D
*/
cpu.putFamily();
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
}
}
int main()

View file

@ -37,6 +37,7 @@ test: normalize_prefix jmp bad_address $(TARGET)
$(MAKE) -C ../gen
./test_nm.sh
./test_nm.sh Y
./test_nm.sh avx512
./test_address.sh
./jmp
./bad_address

View file

@ -27,6 +27,8 @@ struct Code : Xbyak::CodeGenerator {
TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]));
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
TEST_EXCEPTION(mov(eax, ptr [xmm0]));
TEST_EXCEPTION(fld(dword [xmm0]));
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3));
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3));
#ifdef XBYAK64

View file

@ -1,27 +0,0 @@
Copyright (c) 2007-2012 Cybozu Labs, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the Cybozu Labs, Inc. nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -2,10 +2,11 @@
/**
@file
@brief int type definition and macros
Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
@author MITSUNARI Shigeo(@herumi)
*/
#if defined(_MSC_VER) && (MSC_VER <= 1500)
#if defined(_MSC_VER) && (MSC_VER <= 1500) && !defined(CYBOZU_DEFINED_INTXX)
#define CYBOZU_DEFINED_INTXX
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef unsigned int uint32_t;
@ -38,27 +39,33 @@
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
#endif
#endif
#ifndef CYBOZU_FORCE_INLINE
#ifdef _MSC_VER
#define CYBOZU_FORCE_INLINE __forceinline
#else
#define CYBOZU_FORCE_INLINE __attribute__((always_inline))
#endif
#endif
#ifndef CYBOZU_UNUSED
#ifdef __GNUC__
#define CYBOZU_UNUSED __attribute__((unused))
#else
#define CYBOZU_UNUSED
#endif
#endif
#ifndef CYBOZU_ALLOCA
#ifdef _MSC_VER
#include <malloc.h>
#define CYBOZU_ALLOCA(x) _malloca(x)
#else
#define CYBOZU_ALLOCA_(x) __builtin_alloca(x)
#endif
#endif
#ifndef CYBOZU_FOREACH
// std::vector<int> v; CYBOZU_FOREACH(auto x, v) {...}
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define CYBOZU_FOREACH(type_x, xs) for each (type_x in xs)
#elif defined(__GNUC__)
#define CYBOZU_FOREACH(type_x, xs) for (type_x : xs)
#define CYBOZU_ALLOCA(x) __builtin_alloca(x)
#endif
#endif
#ifndef CYBOZU_NUM_OF_ARRAY
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
#endif
#ifndef CYBOZU_SNPRINTF
#ifdef _MSC_VER
#if defined(_MSC_VER) && (_MSC_VER < 1900)
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
#else
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
@ -68,20 +75,36 @@
#define CYBOZU_CPP_VERSION_CPP03 0
#define CYBOZU_CPP_VERSION_TR1 1
#define CYBOZU_CPP_VERSION_CPP11 2
#define CYBOZU_CPP_VERSION_CPP14 3
#define CYBOZU_CPP_VERSION_CPP17 4
#if (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
#ifdef __GNUC__
#define CYBOZU_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
#else
#define CYBOZU_GNUC_PREREQ(major, minor) 0
#endif
#if (__cplusplus >= 201703)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
#elif (__cplusplus >= 201402)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
#endif
#elif (__GNUC__ >= 4 && __GNUC_MINOR__ >= 5) || (__clang_major__ >= 3)
#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
#endif
#if (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1)
#ifdef CYBOZU_USE_BOOST
#define CYBOZU_NAMESPACE_STD boost
#define CYBOZU_NAMESPACE_TR1_BEGIN
#define CYBOZU_NAMESPACE_TR1_END
#elif (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1) && !defined(__APPLE__)
#define CYBOZU_NAMESPACE_STD std::tr1
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
#define CYBOZU_NAMESPACE_TR1_END }
@ -92,25 +115,44 @@
#endif
#ifndef CYBOZU_OS_BIT
#if defined(_WIN64) || defined(__x86_64__)
#if defined(_WIN64) || defined(__x86_64__) || defined(__AARCH64EL__) || defined(__EMSCRIPTEN__)
#define CYBOZU_OS_BIT 64
#else
#define CYBOZU_OS_BIT 32
#endif
#endif
#ifndef CYBOZU_HOST
#define CYBOZU_HOST_UNKNOWN 0
#define CYBOZU_HOST_INTEL 1
#define CYBOZU_HOST_ARM 2
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
#define CYBOZU_HOST CYBOZU_HOST_INTEL
#elif defined(__arm__) || defined(__AARCH64EL__)
#define CYBOZU_HOST CYBOZU_HOST_ARM
#else
#define CYBOZU_HOST CYBOZU_HOST_UNKNOWN
#endif
#endif
#ifndef CYBOZU_ENDIAN
#define CYBOZU_ENDIAN_UNKNOWN 0
#define CYBOZU_ENDIAN_LITTLE 1
#define CYBOZU_ENDIAN_BIG 2
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL)
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
#elif (CYBOZU_HOST == CYBOZU_HOST_ARM) && (defined(__ARM_EABI__) || defined(__AARCH64EL__))
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
#else
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
#endif
#endif
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
#define CYBOZU_NOEXCEPT noexcept
#else
#define CYBOZU_NOEXCEPT throw()
#endif
namespace cybozu {
template<class T>
void disable_warning_unused_variable(const T&) { }

View file

@ -3,7 +3,7 @@
@file
@brief unit test class
Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
@author MITSUNARI Shigeo(@herumi)
*/
#include <stdio.h>

View file

@ -1143,10 +1143,11 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
ret();
}
} code;
Xbyak::CodeArray::protect(p, 4096, true);
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RE);
code.getCode<void (*)()>()();
CYBOZU_TEST_EQUAL(*x0, 123);
CYBOZU_TEST_EQUAL(*x1, 456);
CYBOZU_TEST_EQUAL(buf[8], 99);
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RW);
}
#endif

View file

@ -840,9 +840,9 @@ public:
put(p.name, _YMM|YMM_KZ, _YMM|MEM);
put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
if (!p.M_X) continue;
put(p.name, MEM, _XMM);
put(p.name, MEM, _YMM);
put(p.name, MEM, _ZMM);
put(p.name, MEM|MEM_K, _XMM);
put(p.name, MEM|MEM_K, _YMM);
put(p.name, MEM|MEM_K, _ZMM);
}
put("vsqrtpd", XMM_KZ, M_1to2 | _MEM);
put("vsqrtpd", YMM_KZ, M_1to4 | _MEM);

View file

@ -1,4 +1,5 @@
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_bin2hex.h"
#include <stdlib.h>
@ -121,6 +122,15 @@ class Test {
void operator=(const Test&);
const bool isXbyak_;
int funcNum_;
/*
and_, or_, xor_, not_ => and, or, xor, not
*/
std::string removeUnderScore(std::string s) const
{
if (!isXbyak_ && s[s.size() - 1] == '_') s.resize(s.size() - 1);
return s;
}
// check all op1, op2, op3
void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
{
@ -448,6 +458,10 @@ class Test {
#ifdef XBYAK64
"cdqe",
"cqo",
"cmpsq",
"movsq",
"scasq",
"stosq",
#else
"aaa",
"aad",
@ -476,6 +490,18 @@ class Test {
"lahf",
// "lock",
"cmpsb",
"cmpsw",
"cmpsd",
"movsb",
"movsw",
"movsd",
"scasb",
"scasw",
"scasd",
"stosb",
"stosw",
"stosd",
"nop",
"sahf",
@ -951,15 +977,16 @@ class Test {
static const char tbl[][16] = {
"adc",
"add",
"and",
"and_",
"cmp",
"or",
"or_",
"sbb",
"sub",
"xor",
"xor_",
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const char *p = tbl[i];
const std::string s = removeUnderScore(tbl[i]);
const char *p = s.c_str();
put(p, REG32, REG32|MEM);
put(p, REG64, REG64|MEM);
put(p, REG16, REG16|MEM);
@ -1017,10 +1044,11 @@ class Test {
"imul",
"mul",
"neg",
"not",
"not_",
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const char *p = tbl[i];
const std::string s = removeUnderScore(tbl[i]);
const char *p = s.c_str();
put(p, REG32e|REG16|REG8|REG8_3);
put(p, MEM32|MEM16|MEM8);
}
@ -1042,15 +1070,19 @@ class Test {
push word 2
reduce 2-byte stack, so I can't support it
*/
const char *p = "push";
put(p, REG16);
put(p, IMM8); // IMM16 decrease -2 from esp
put(p, MEM16);
put("push", IMM8|IMM32);
if (isXbyak_) {
puts("push(word, 1000);dump();");
} else {
puts("push word 1000");
}
put("push", REG16|MEM16);
put("pop", REG16|MEM16);
#ifdef XBYAK64
put("push", REG64);
put("pop", REG64);
put("push", REG64|IMM32|MEM64);
put("pop", REG64|MEM64);
#else
put("push", REG32|IMM32|MEM32);
put("pop", REG32|MEM32);
@ -2672,7 +2704,7 @@ public:
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const char *name = tbl[i];
put(name, MEM, ZMM);
put(name, MEM|MEM_K, ZMM|XMM|YMM);
put(name, ZMM, MEM);
}
}

View file

@ -1,4 +1,5 @@
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#define XBYAK_ENABLE_OMITTED_OPERAND
#include "xbyak/xbyak.h"

View file

@ -31,7 +31,7 @@ address %1% jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame > x.lst
diff x.lst ok.lst
diff -w x.lst ok.lst
wc x.lst
:end

View file

@ -38,5 +38,5 @@ make_nm jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff x.lst ok.lst
diff -w x.lst ok.lst
wc x.lst

View file

@ -27,5 +27,5 @@ awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti
make_512 jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
nm_frame |%FILTER% > x.lst
diff x.lst ok.lst
diff -w x.lst ok.lst
wc x.lst

View file

@ -39,5 +39,5 @@ if /i "%Y%"=="1" (
make_nm jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff x.lst ok.lst
diff -w x.lst ok.lst
wc x.lst

View file

@ -19,6 +19,12 @@ else if ($1 == "Y64") then
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "avx512") then
echo "nasm(64bit) + avx512"
set EXE=nasm
set OPT2="-DXBYAK64 -DUSE_AVX512"
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm

View file

@ -105,7 +105,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5601 /* 0xABCD = A.BC(D) */
VERSION = 0x5670 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -566,7 +566,7 @@ struct EvexModifierRounding {
explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
int rounding;
};
struct EvexModifierZero{};
struct EvexModifierZero{EvexModifierZero() {}};
struct Xmm : public Mmx {
explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
@ -614,16 +614,16 @@ struct Reg64 : public Reg32e {
};
struct RegRip {
sint64 disp_;
Label* label_;
const Label* label_;
bool isAddr_;
explicit RegRip(sint64 disp = 0, Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
friend const RegRip operator+(const RegRip& r, sint64 disp) {
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
}
friend const RegRip operator-(const RegRip& r, sint64 disp) {
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
}
friend const RegRip operator+(const RegRip& r, Label& label) {
friend const RegRip operator+(const RegRip& r, const Label& label) {
if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING);
return RegRip(r.disp_, &label);
}
@ -848,10 +848,15 @@ protected:
uint64 disp = i->getVal(top_);
rewrite(i->codeOffset, disp, i->jmpSize);
}
if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT);
if (alloc_->useProtect() && !protect(top_, size_, PROTECT_RWE)) throw Error(ERR_CANT_PROTECT);
isCalledCalcJmpAddress_ = true;
}
public:
enum ProtectMode {
PROTECT_RW = 0, // read/write
PROTECT_RWE = 1, // read/write/exec
PROTECT_RE = 2 // read/exec
};
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
@ -861,7 +866,7 @@ public:
, isCalledCalcJmpAddress_(false)
{
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) {
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, PROTECT_RWE)) {
alloc_->free(top_);
throw Error(ERR_CANT_PROTECT);
}
@ -869,7 +874,7 @@ public:
virtual ~CodeArray()
{
if (isAllocType()) {
if (alloc_->useProtect()) protect(top_, maxSize_, false);
if (alloc_->useProtect()) protect(top_, maxSize_, PROTECT_RW);
alloc_->free(top_);
}
}
@ -960,19 +965,36 @@ public:
change exec permission of memory
@param addr [in] buffer address
@param size [in] buffer size
@param canExec [in] true(enable to exec), false(disable to exec)
@param protectMode [in] mode(RW/RWE/RE)
@return true(success), false(failure)
*/
static inline bool protect(const void *addr, size_t size, bool canExec)
static inline bool protect(const void *addr, size_t size, int protectMode)
{
#if defined(_WIN32)
const DWORD c_rw = PAGE_READWRITE;
const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
const DWORD c_re = PAGE_EXECUTE_READ;
DWORD mode;
#else
const int c_rw = PROT_READ | PROT_WRITE;
const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
const int c_re = PROT_READ | PROT_EXEC;
int mode;
#endif
switch (protectMode) {
case PROTECT_RW: mode = c_rw; break;
case PROTECT_RWE: mode = c_rwe; break;
case PROTECT_RE: mode = c_re; break;
default:
return false;
}
#if defined(_WIN32)
DWORD oldProtect;
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
#elif defined(__GNUC__)
size_t pageSize = sysconf(_SC_PAGESIZE);
size_t iaddr = reinterpret_cast<size_t>(addr);
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
#else
return true;
@ -999,46 +1021,43 @@ public:
M_ripAddr
};
Address(uint32 sizeBit, bool broadcast, const RegExp& e)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
{
e_.verify();
}
#ifdef XBYAK64
explicit Address(size_t disp)
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ }
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), permitVsib_(false), broadcast_(broadcast) { }
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
#endif
void permitVsib() const { permitVsib_ = true; }
RegExp getRegExp(bool optimize = true) const
{
return optimize ? e_.optimize() : e_;
}
Mode getMode() const { return mode_; }
bool is32bit() const { verify(); return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
bool isOnlyDisp() const { verify(); return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
size_t getDisp() const { verify(); return e_.getDisp(); }
bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
size_t getDisp() const { return e_.getDisp(); }
uint8 getRex() const
{
verify();
if (mode_ != M_ModRM) return 0;
return getRegExp().getRex();
}
bool is64bitDisp() const { verify(); return mode_ == M_64bitDisp; } // for moffset
bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
bool isBroadcast() const { return broadcast_; }
const Label* getLabel() const { return label_; }
bool operator==(const Address& rhs) const
{
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_;
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
}
bool operator!=(const Address& rhs) const { return !operator==(rhs); }
bool isVsib() const { return e_.isVsib(); }
private:
RegExp e_;
const Label* label_;
Mode mode_;
mutable bool permitVsib_;
bool broadcast_;
void verify() const { if (e_.isVsib() && !permitVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); }
};
inline const Address& Operand::getAddress() const
@ -1443,6 +1462,7 @@ private:
T_B32 = 1 << 26, // m32bcst
T_B64 = 1 << 27, // m64bcst
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
T_XXX
};
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
@ -1669,8 +1689,9 @@ private:
// reg is reg field of ModRM
// immSize is the size for immediate value
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0)
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
{
if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
if (addr.getMode() == Address::M_ModRM) {
setSIB(addr.getRegExp(), reg, disp8N);
} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
@ -1812,15 +1833,20 @@ private:
}
void opPushPop(const Operand& op, int code, int ext, int alt)
{
int bit = op.getBit();
if (bit == 16 || bit == BIT) {
if (bit == 16) db(0x66);
if (op.isREG()) {
if (op.isBit(16)) db(0x66);
if (op.getReg().getIdx() >= 8) db(0x41);
db(alt | (op.getIdx() & 7));
} else if (op.isMEM()) {
opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
} else {
throw Error(ERR_BAD_COMBINATION);
return;
}
if (op.isMEM()) {
opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
return;
}
}
throw Error(ERR_BAD_COMBINATION);
}
void verifyMemHasSize(const Operand& op) const
{
@ -1925,7 +1951,7 @@ private:
} else {
vex(r, base, p1, type, code, x);
}
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N);
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
} else {
const Reg& base = op2.getReg();
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
@ -2026,8 +2052,7 @@ private:
}
if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
}
addr.permitVsib();
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code);
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
}
enum {
xx_yy_zz = 0,
@ -2051,7 +2076,6 @@ private:
{
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
checkGather2(x, addr.getRegExp().getIndex(), mode);
addr.permitVsib();
opVex(x, 0, addr, type, code);
}
/*
@ -2071,7 +2095,6 @@ private:
{
if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
addr.permitVsib();
opVex(x, 0, addr, type, code);
}
public:

View file

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.601"; }
const char *getVersionString() const { return "5.67"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@ -122,8 +122,11 @@ void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
void cmpsb() { db(0xA6); }
void cmpsd() { db(0xA7); }
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
void cmpsw() { db(0x66); db(0xA7); }
void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
@ -683,6 +686,9 @@ void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
void scasb() { db(0xAE); }
void scasd() { db(0xAF); }
void scasw() { db(0x66); db(0xAF); }
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
@ -742,6 +748,9 @@ void stc() { db(0xF9); }
void std() { db(0xFD); }
void sti() { db(0xFB); }
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
void stosb() { db(0xAA); }
void stosd() { db(0xAB); }
void stosw() { db(0x66); db(0xAB); }
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
@ -1001,10 +1010,10 @@ void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X
void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x92, 0); }
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x92, 1); }
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); }
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
@ -1030,9 +1039,9 @@ void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5D); }
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5D); }
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x29); }
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x29); }
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x29); }
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
@ -1068,9 +1077,9 @@ void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 |
void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); }
void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x11); }
void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x11); }
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x11); }
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); }
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
@ -1135,10 +1144,10 @@ void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x90, 1); }
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x90, 0); }
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x91, 2); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x91, 1); }
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); }
void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
@ -1544,7 +1553,10 @@ void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
void cdqe() { db(0x48); db(0x98); }
void cqo() { db(0x48); db(0x99); }
void cmpsq() { db(0x48); db(0xA7); }
void movsq() { db(0x48); db(0xA5); }
void scasq() { db(0x48); db(0xAF); }
void stosq() { db(0x48); db(0xAB); }
void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
@ -1717,18 +1729,18 @@ void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(1
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x92, 1); }
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x92, 0); }
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x93, 0); }
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x93, 2); }
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
@ -1745,17 +1757,17 @@ void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {i
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
@ -1815,10 +1827,10 @@ void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T
void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); }
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); }
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); }
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x91, 0); }
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); }
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); }
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); }
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
@ -1869,10 +1881,10 @@ void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.get
void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 0); }
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 1); }
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 2); }
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 0); }
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
@ -1936,18 +1948,18 @@ void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 1); }
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 0); }
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 0); }
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 2); }
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }

View file

@ -84,6 +84,67 @@ class Cpu {
displayModel = model;
}
}
unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
{
return (val >> base) & ((1u << (end - base)) - 1);
}
void setCacheHierarchy()
{
if ((type_ & tINTEL) == 0) return;
const unsigned int NO_CACHE = 0;
const unsigned int DATA_CACHE = 1;
// const unsigned int INSTRUCTION_CACHE = 2;
const unsigned int UNIFIED_CACHE = 3;
unsigned int smt_width = 0;
unsigned int n_cores = 0;
unsigned int data[4];
/*
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
If x2APIC is supported, these are the only correct numbers.
leaf 0xB can be zeroed-out by a hypervisor
*/
getCpuidEx(0x0, 0, data);
if (data[0] >= 0xB) {
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
smt_width = data[1] & 0x7FFF;
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
n_cores = data[1] & 0x7FFF;
}
/*
Assumptions:
the first level of data cache is not shared (which is the
case for every existing architecture) and use this to
determine the SMT width for arch not supporting leaf 11.
when leaf 4 reports a number of core less than n_cores
on socket reported by leaf 11, then it is a correct number
of cores not an upperbound.
*/
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
getCpuidEx(0x4, i, data);
unsigned int cacheType = extractBit(data[0], 0, 4);
if (cacheType == NO_CACHE) break;
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
}
assert(nb_logical_cores != 0);
data_cache_size[data_cache_levels] =
(extractBit(data[1], 22, 31) + 1)
* (extractBit(data[1], 12, 21) + 1)
* (extractBit(data[1], 0, 11) + 1)
* (data[2] + 1);
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
assert(smt_width != 0);
cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
data_cache_levels++;
}
}
}
public:
int model;
int family;
@ -92,6 +153,25 @@ public:
int extFamily;
int displayFamily; // family + extFamily
int displayModel; // model + extModel
// may I move these members into private?
static const unsigned int maxNumberCacheLevels = 10;
unsigned int data_cache_size[maxNumberCacheLevels];
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
unsigned int data_cache_levels;
unsigned int getDataCacheLevels() const { return data_cache_levels; }
unsigned int getCoresSharingDataCache(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return cores_sharing_data_cache[i];
}
unsigned int getDataCacheSize(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return data_cache_size[i];
}
/*
data[] = { eax, ebx, ecx, edx }
*/
@ -124,6 +204,7 @@ public:
#endif
}
typedef uint64 Type;
static const Type NONE = 0;
static const Type tMMX = 1 << 0;
static const Type tMMX2 = 1 << 1;
@ -190,6 +271,7 @@ public:
Cpu()
: type_(NONE)
, data_cache_levels(0)
{
unsigned int data[4];
const unsigned int& EAX = data[0];
@ -281,6 +363,7 @@ public:
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
}
setFamily();
setCacheHierarchy();
}
void putFamily() const
{