externals: Update xbyak to v6.68
Merge commit 'f6fdb5f55a88f73ef7bef45f50cf5878ceec9781'
This commit is contained in:
commit
916d7cf9bd
30 changed files with 589 additions and 155 deletions
18
externals/xbyak/.github/workflows/main.yml
vendored
18
externals/xbyak/.github/workflows/main.yml
vendored
|
@ -1,13 +1,21 @@
|
||||||
name: test
|
name: test
|
||||||
on: [push]
|
on: [push]
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: sh
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
test:
|
||||||
name: test
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
container:
|
||||||
|
image: debian:testing
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- run: sudo apt update
|
- run: apt -y update
|
||||||
- run: sudo apt install nasm yasm g++-multilib tcsh
|
- run: apt -y install g++-multilib libboost-dev make nasm yasm
|
||||||
- run: make test
|
- run: make test
|
||||||
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
||||||
|
|
8
externals/xbyak/Android.bp
vendored
Normal file
8
externals/xbyak/Android.bp
vendored
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
//#################################################
|
||||||
|
cc_library_headers {
|
||||||
|
name: "xbyak_headers",
|
||||||
|
vendor: true,
|
||||||
|
export_include_dirs: [
|
||||||
|
"xbyak"
|
||||||
|
],
|
||||||
|
}
|
2
externals/xbyak/CMakeLists.txt
vendored
2
externals/xbyak/CMakeLists.txt
vendored
|
@ -1,6 +1,6 @@
|
||||||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||||
|
|
||||||
project(xbyak LANGUAGES CXX VERSION 6.61)
|
project(xbyak LANGUAGES CXX VERSION 6.68)
|
||||||
|
|
||||||
file(GLOB headers xbyak/*.h)
|
file(GLOB headers xbyak/*.h)
|
||||||
|
|
||||||
|
|
9
externals/xbyak/doc/changelog.md
vendored
9
externals/xbyak/doc/changelog.md
vendored
|
@ -1,5 +1,14 @@
|
||||||
# History
|
# History
|
||||||
|
|
||||||
|
* 2022/Dec/07 ver 6.68 support prefetchit{0,1}
|
||||||
|
* 2022/Nov/30 ver 6.67 support CMPccXADD
|
||||||
|
* 2022/Nov/25 ver 6.66 support RAO-INT
|
||||||
|
* 2022/Nov/22 ver 6.65 consider x32
|
||||||
|
* 2022/Nov/04 ver 6.64 some vmov* support addressing with mask
|
||||||
|
* 2022/Oct/06 ver 6.63 vpmadd52{h,l}uq support AVX-IFMA
|
||||||
|
* 2022/Oct/05 ver 6.63 support amx_fp16/avx_vnni_int8/avx_ne_convert and add setDefaultEncoding()
|
||||||
|
* 2022/Aug/15 ver 6.62 add serialize instruction
|
||||||
|
* 2022/Aug/02 ver 6.61.1 noexcept is supported by Visual Studio 2015 or later
|
||||||
* 2022/Jul/29 ver 6.61 fix exception of movzx eax, ah in 64-bit mode
|
* 2022/Jul/29 ver 6.61 fix exception of movzx eax, ah in 64-bit mode
|
||||||
* 2022/Jun/16 ver 6.60.2 fix detection of GFNI, VAES, and VPCLMULQDQ
|
* 2022/Jun/16 ver 6.60.2 fix detection of GFNI, VAES, and VPCLMULQDQ
|
||||||
* 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
|
* 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
|
||||||
|
|
12
externals/xbyak/doc/install.md
vendored
12
externals/xbyak/doc/install.md
vendored
|
@ -12,3 +12,15 @@ make install
|
||||||
```
|
```
|
||||||
|
|
||||||
These files are copied into `/usr/local/include/xbyak`.
|
These files are copied into `/usr/local/include/xbyak`.
|
||||||
|
|
||||||
|
# Building xbyak - Using vcpkg
|
||||||
|
|
||||||
|
You can download and install xbyak using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
|
||||||
|
|
||||||
|
git clone https://github.com/Microsoft/vcpkg.git
|
||||||
|
cd vcpkg
|
||||||
|
./bootstrap-vcpkg.sh
|
||||||
|
./vcpkg integrate install
|
||||||
|
./vcpkg install xbyak
|
||||||
|
|
||||||
|
The xbyak port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
|
||||||
|
|
8
externals/xbyak/doc/usage.md
vendored
8
externals/xbyak/doc/usage.md
vendored
|
@ -110,7 +110,15 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64],
|
||||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||||
|
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- setDefaultEncoding(PreferredEncoding encoding);
|
||||||
|
- Set the default encoding to select EVEX or VEX.
|
||||||
|
- The default value is EvexEncoding.
|
||||||
|
- This function affects only an instruction that has a PreferredEncoding argument such as vpdpbusd.
|
||||||
|
|
||||||
### Remark
|
### Remark
|
||||||
* `k1`, ..., `k7` are opmask registers.
|
* `k1`, ..., `k7` are opmask registers.
|
||||||
- `k0` is dealt as no mask.
|
- `k0` is dealt as no mask.
|
||||||
|
|
2
externals/xbyak/gen/Makefile
vendored
2
externals/xbyak/gen/Makefile
vendored
|
@ -1,6 +1,6 @@
|
||||||
TARGET=../xbyak/xbyak_mnemonic.h
|
TARGET=../xbyak/xbyak_mnemonic.h
|
||||||
BIN=sortline gen_code gen_avx512
|
BIN=sortline gen_code gen_avx512
|
||||||
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
CFLAGS=-I../ -O2 -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||||
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
||||||
sortline: sortline.cpp
|
sortline: sortline.cpp
|
||||||
$(CXX) $(CFLAGS) $< -o $@
|
$(CXX) $(CFLAGS) $< -o $@
|
||||||
|
|
40
externals/xbyak/gen/gen_avx512.cpp
vendored
40
externals/xbyak/gen/gen_avx512.cpp
vendored
|
@ -387,9 +387,6 @@ void putX_X_XM_IMM()
|
||||||
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||||
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||||
|
|
||||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
|
||||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
|
||||||
|
|
||||||
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||||
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||||
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||||
|
@ -695,29 +692,29 @@ void putMov()
|
||||||
int type;
|
int type;
|
||||||
int mode;
|
int mode;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||||
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||||
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||||
|
|
||||||
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
|
|
||||||
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
|
|
||||||
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
|
|
||||||
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
|
|
||||||
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
|
@ -827,7 +824,6 @@ void putMisc()
|
||||||
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
||||||
|
|
||||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||||
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
|
|
||||||
|
|
||||||
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
|
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
|
||||||
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
|
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
|
||||||
|
|
92
externals/xbyak/gen/gen_code.cpp
vendored
92
externals/xbyak/gen/gen_code.cpp
vendored
|
@ -560,6 +560,8 @@ void put()
|
||||||
{ 0, "nta", 0x18},
|
{ 0, "nta", 0x18},
|
||||||
{ 2, "wt1", 0x0D},
|
{ 2, "wt1", 0x0D},
|
||||||
{ 1, "w", 0x0D},
|
{ 1, "w", 0x0D},
|
||||||
|
{ 7, "it0", 0x18},
|
||||||
|
{ 6, "it1", 0x18},
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -693,6 +695,7 @@ void put()
|
||||||
{ "lock", 0xF0 },
|
{ "lock", 0xF0 },
|
||||||
|
|
||||||
{ "sahf", 0x9E },
|
{ "sahf", 0x9E },
|
||||||
|
{ "serialize", 0x0F, 0x01, 0xE8 },
|
||||||
{ "stc", 0xF9 },
|
{ "stc", 0xF9 },
|
||||||
{ "std", 0xFD },
|
{ "std", 0xFD },
|
||||||
{ "sti", 0xFB },
|
{ "sti", 0xFB },
|
||||||
|
@ -806,6 +809,23 @@ void put()
|
||||||
printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
|
printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
uint8_t prefix;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "aadd", 0 },
|
||||||
|
{ "aand", 0x66 },
|
||||||
|
{ "aor", 0xF2 },
|
||||||
|
{ "axor", 0xF3 },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl *p = &tbl[i];
|
||||||
|
printf("void %s(const Address& addr, const Reg32e ®) { ", p->name);
|
||||||
|
if (p->prefix) printf("db(0x%02X); ", p->prefix);
|
||||||
|
printf("opModM(addr, reg, 0x0F, 0x38, 0x0FC); }\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
|
@ -1666,6 +1686,25 @@ void put()
|
||||||
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
|
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
int type;
|
||||||
|
uint8_t code;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "vbcstnebf162ps", T_F3 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
|
||||||
|
{ "vbcstnesh2ps", T_66 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
|
||||||
|
{ "vcvtneebf162ps", T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||||
|
{ "vcvtneeph2ps", T_66 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||||
|
{ "vcvtneobf162ps", T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||||
|
{ "vcvtneoph2ps", T_0F38 | T_W0 | T_YMM, 0xB0 }
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl& p = tbl[i];
|
||||||
|
printf("void %s(const Xmm& x, const Address& addr) { opVex(x, 0, addr, %s, 0x%02X); }\n", p.name, type2String(p.type).c_str(), p.code);
|
||||||
|
}
|
||||||
|
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }");
|
||||||
|
}
|
||||||
// haswell gpr(reg, reg, r/m)
|
// haswell gpr(reg, reg, r/m)
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
|
@ -1755,11 +1794,33 @@ void put()
|
||||||
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||||
{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||||
{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||||
|
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
|
||||||
|
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
std::string type = type2String(p->type);
|
std::string type = type2String(p->type);
|
||||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
|
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// avx-vnni-int8
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
uint8_t code;
|
||||||
|
const char *name;
|
||||||
|
int type;
|
||||||
|
} tbl[] = {
|
||||||
|
{ 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl *p = &tbl[i];
|
||||||
|
std::string type = type2String(p->type);
|
||||||
|
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1824,6 +1885,34 @@ void put64()
|
||||||
|
|
||||||
puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
|
puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
|
||||||
puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
|
puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
|
||||||
|
// CMPccXADD
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
uint8_t code;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "be", 0xE6 },
|
||||||
|
{ "b", 0xE2 },
|
||||||
|
{ "le", 0xEE },
|
||||||
|
{ "l", 0xEC },
|
||||||
|
{ "nbe", 0xE7 },
|
||||||
|
{ "nb", 0xE3 },
|
||||||
|
{ "nle", 0xEF },
|
||||||
|
{ "nl", 0xED },
|
||||||
|
{ "no", 0xE1 },
|
||||||
|
{ "np", 0xEB },
|
||||||
|
{ "ns", 0xE9 },
|
||||||
|
{ "nz", 0xE5 },
|
||||||
|
{ "o", 0xE0 },
|
||||||
|
{ "p", 0xEA },
|
||||||
|
{ "s", 0xE8 },
|
||||||
|
{ "z", 0xE4 },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl *p = &tbl[i];
|
||||||
|
printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0x%02X, false); }\n", p->name, p->code);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void putAMX_TILE()
|
void putAMX_TILE()
|
||||||
|
@ -1842,6 +1931,7 @@ void putAMX_INT8()
|
||||||
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
|
||||||
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
|
||||||
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
|
||||||
|
puts("void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }");
|
||||||
}
|
}
|
||||||
void putAMX_BF16()
|
void putAMX_BF16()
|
||||||
{
|
{
|
||||||
|
|
2
externals/xbyak/meson.build
vendored
2
externals/xbyak/meson.build
vendored
|
@ -5,7 +5,7 @@
|
||||||
project(
|
project(
|
||||||
'xbyak',
|
'xbyak',
|
||||||
'cpp',
|
'cpp',
|
||||||
version: '6.61',
|
version: '6.68',
|
||||||
license: 'BSD-3-Clause',
|
license: 'BSD-3-Clause',
|
||||||
default_options: 'b_ndebug=if-release'
|
default_options: 'b_ndebug=if-release'
|
||||||
)
|
)
|
||||||
|
|
3
externals/xbyak/readme.md
vendored
3
externals/xbyak/readme.md
vendored
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
# Xbyak 6.61 [![Badge Build]][Build Status]
|
# Xbyak 6.68 [![Badge Build]][Build Status]
|
||||||
|
|
||||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||||
|
|
||||||
### News
|
### News
|
||||||
|
|
||||||
|
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||||
- add movdiri, movdir64b, clwb, cldemote
|
- add movdiri, movdir64b, clwb, cldemote
|
||||||
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||||
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
||||||
|
|
15
externals/xbyak/readme.txt
vendored
15
externals/xbyak/readme.txt
vendored
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.61
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.68
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -166,13 +166,15 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64],
|
||||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||||
|
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||||
注意
|
注意
|
||||||
* k1, ..., k7 は新しいopmaskレジスタです。
|
* k1, ..., k7 は新しいopmaskレジスタです。
|
||||||
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
|
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
|
||||||
* `k4 | k3`と`k3 | k4`は意味が異なります。
|
* `k4 | k3`と`k3 | k4`は意味が異なります。
|
||||||
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
|
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
|
||||||
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
|
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
|
||||||
|
* setDefaultEncoding()でencoding省略時のEVEX/VEXを設定できます。
|
||||||
|
|
||||||
・ラベル
|
・ラベル
|
||||||
|
|
||||||
|
@ -400,6 +402,15 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2022/12/07 ver 6.68 prefetchit{0,1}サポート
|
||||||
|
2022/11/30 ver 6.67 CMPccXADDサポート
|
||||||
|
2022/11/25 ver 6.66 RAO-INTサポート
|
||||||
|
2022/11/22 ver 6.65 x32動作確認
|
||||||
|
2022/11/04 ver 6.64 vmov*命令をmaskつきアドレッシング対応修正
|
||||||
|
2022/10/06 ver 6.63 AVX-IFMA用のvpmadd52{h,l}uq対応
|
||||||
|
2022/10/05 amx_fp16/avx_vnni_int8/avx_ne_convertt対応とsetDefaultEncoding()追加
|
||||||
|
2022/09/15 ver 6.62 serialize追加
|
||||||
|
2022/08/02 ver 6.61.1 noexceptはVisual Studio 2015以降対応
|
||||||
2022/07/29 ver 6.61 movzx eax, ahがエラーになるのを修正
|
2022/07/29 ver 6.61 movzx eax, ahがエラーになるのを修正
|
||||||
2022/06/16 ver 6.60.2 GFNI, VAES, VPCLMULQDQの判定修正
|
2022/06/16 ver 6.60.2 GFNI, VAES, VPCLMULQDQの判定修正
|
||||||
2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応
|
2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応
|
||||||
|
|
5
externals/xbyak/sample/Makefile
vendored
5
externals/xbyak/sample/Makefile
vendored
|
@ -1,6 +1,7 @@
|
||||||
XBYAK_INC=../xbyak/xbyak.h
|
XBYAK_INC=../xbyak/xbyak.h
|
||||||
|
CXX?=g++
|
||||||
|
|
||||||
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
|
BOOST_EXIST=$(shell echo "#include <boost/spirit/core.hpp>" | $(CXX) -x c++ -c - 2>/dev/null && echo 1)
|
||||||
UNAME_M=$(shell uname -m)
|
UNAME_M=$(shell uname -m)
|
||||||
|
|
||||||
ONLY_64BIT=0
|
ONLY_64BIT=0
|
||||||
|
@ -104,7 +105,7 @@ profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
||||||
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o $(TARGET) *.exe profiler profiler-vtune
|
rm -rf $(TARGET) profiler profiler-vtune
|
||||||
|
|
||||||
test : test0.cpp $(XBYAK_INC)
|
test : test0.cpp $(XBYAK_INC)
|
||||||
test64: test0.cpp $(XBYAK_INC)
|
test64: test0.cpp $(XBYAK_INC)
|
||||||
|
|
2
externals/xbyak/sample/quantize.cpp
vendored
2
externals/xbyak/sample/quantize.cpp
vendored
|
@ -199,7 +199,7 @@ int main(int argc, char *argv[])
|
||||||
quantize2(dest2, src, qTbl);
|
quantize2(dest2, src, qTbl);
|
||||||
for (int i = 0; i < N; i++) {
|
for (int i = 0; i < N; i++) {
|
||||||
if (dest[i] != dest2[i]) {
|
if (dest[i] != dest2[i]) {
|
||||||
printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
|
printf("err[%d] %u %u\n", i, dest[i], dest2[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
7
externals/xbyak/sample/test_util.cpp
vendored
7
externals/xbyak/sample/test_util.cpp
vendored
|
@ -89,6 +89,13 @@ void putCPUinfo(bool onlyCpuidFeature)
|
||||||
{ Cpu::tMOVDIRI, "movdiri" },
|
{ Cpu::tMOVDIRI, "movdiri" },
|
||||||
{ Cpu::tMOVDIR64B, "movdir64b" },
|
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||||
{ Cpu::tCLZERO, "clzero" },
|
{ Cpu::tCLZERO, "clzero" },
|
||||||
|
{ Cpu::tAMX_FP16, "amx_fp16" },
|
||||||
|
{ Cpu::tAVX_VNNI_INT8, "avx_vnni_int8" },
|
||||||
|
{ Cpu::tAVX_NE_CONVERT, "avx_ne_convert" },
|
||||||
|
{ Cpu::tAVX_IFMA, "avx_ifma" },
|
||||||
|
{ Cpu::tRAO_INT, "rao-int" },
|
||||||
|
{ Cpu::tCMPCCXADD, "cmpccxadd" },
|
||||||
|
{ Cpu::tPREFETCHITI, "prefetchiti" },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||||
|
|
8
externals/xbyak/sample/toyvm.cpp
vendored
8
externals/xbyak/sample/toyvm.cpp
vendored
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
mem_ 4byte x 65536
|
mem_ 4byte x 65536
|
||||||
|
|
||||||
すべての命令は4byte固定
|
all instructions are fixed at 4 bytes.
|
||||||
即値は全て16bit
|
all immediate values are 16-bit.
|
||||||
|
|
||||||
R = A or B
|
R = A or B
|
||||||
vldiR, imm ; R = imm
|
vldiR, imm ; R = imm
|
||||||
|
@ -109,7 +109,7 @@ public:
|
||||||
reg[r] -= imm;
|
reg[r] -= imm;
|
||||||
break;
|
break;
|
||||||
case PUT:
|
case PUT:
|
||||||
printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
printf("%c %8u(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
||||||
break;
|
break;
|
||||||
case JNZ:
|
case JNZ:
|
||||||
if (reg[r] != 0) pc += static_cast<signed short>(imm);
|
if (reg[r] != 0) pc += static_cast<signed short>(imm);
|
||||||
|
@ -294,7 +294,7 @@ lp:
|
||||||
p = t;
|
p = t;
|
||||||
n--;
|
n--;
|
||||||
if (n != 0) goto lp;
|
if (n != 0) goto lp;
|
||||||
printf("c=%d(0x%08x)\n", c, c);
|
printf("c=%u(0x%08x)\n", c, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
|
49
externals/xbyak/test/Makefile
vendored
49
externals/xbyak/test/Makefile
vendored
|
@ -1,6 +1,9 @@
|
||||||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32
|
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32
|
||||||
XBYAK_INC=../xbyak/xbyak.h
|
XBYAK_INC=../xbyak/xbyak.h
|
||||||
UNAME_S=$(shell uname -s)
|
UNAME_S=$(shell uname -s)
|
||||||
|
ifeq ($(shell ./detect_x32),x32)
|
||||||
|
X32?=1
|
||||||
|
endif
|
||||||
BIT=32
|
BIT=32
|
||||||
ifeq ($(shell uname -m),x86_64)
|
ifeq ($(shell uname -m),x86_64)
|
||||||
BIT=64
|
BIT=64
|
||||||
|
@ -20,9 +23,9 @@ endif
|
||||||
|
|
||||||
all: $(TARGET)
|
all: $(TARGET)
|
||||||
|
|
||||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||||
|
|
||||||
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
CFLAGS=-O2 -Wall -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||||
make_nm:
|
make_nm:
|
||||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||||
|
@ -53,12 +56,11 @@ noexception: noexception.cpp ../xbyak/xbyak.h
|
||||||
test_nm: normalize_prefix $(TARGET)
|
test_nm: normalize_prefix $(TARGET)
|
||||||
$(MAKE) -C ../gen
|
$(MAKE) -C ../gen
|
||||||
ifneq ($(ONLY_64BIT),1)
|
ifneq ($(ONLY_64BIT),1)
|
||||||
./test_nm.sh
|
CXX=$(CXX) ./test_nm.sh
|
||||||
./test_nm.sh noexcept
|
CXX=$(CXX) ./test_nm.sh noexcept
|
||||||
./noexception
|
CXX=$(CXX) ./test_nm.sh Y
|
||||||
./test_nm.sh Y
|
CXX=$(CXX) ./test_nm.sh avx512
|
||||||
./test_nm.sh avx512
|
CXX=$(CXX) ./test_address.sh
|
||||||
./test_address.sh
|
|
||||||
./jmp
|
./jmp
|
||||||
./cvt_test32
|
./cvt_test32
|
||||||
endif
|
endif
|
||||||
|
@ -67,32 +69,38 @@ endif
|
||||||
./misc32
|
./misc32
|
||||||
./cvt_test
|
./cvt_test
|
||||||
ifeq ($(BIT),64)
|
ifeq ($(BIT),64)
|
||||||
./test_address.sh 64
|
CXX=$(CXX) ./test_address.sh 64
|
||||||
./test_nm.sh 64
|
ifneq ($(X32),1)
|
||||||
./test_nm.sh Y64
|
CXX=$(CXX) ./test_nm.sh 64
|
||||||
|
CXX=$(CXX) ./test_nm.sh Y64
|
||||||
|
endif
|
||||||
./jmp64
|
./jmp64
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test_avx: normalize_prefix
|
test_avx: normalize_prefix
|
||||||
ifneq ($(ONLY_64BIT),0)
|
ifneq ($(ONLY_64BIT),0)
|
||||||
./test_avx.sh
|
CXX=$(CXX) ./test_avx.sh
|
||||||
./test_avx.sh Y
|
CXX=$(CXX) ./test_avx.sh Y
|
||||||
endif
|
endif
|
||||||
ifeq ($(BIT),64)
|
ifeq ($(BIT),64)
|
||||||
./test_address.sh 64
|
CXX=$(CXX) ./test_avx.sh 64
|
||||||
./test_avx.sh 64
|
ifneq ($(X32),1)
|
||||||
./test_avx.sh Y64
|
CXX=$(CXX) ./test_avx.sh Y64
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test_avx512: normalize_prefix
|
test_avx512: normalize_prefix
|
||||||
ifneq ($(ONLY_64BIT),0)
|
ifneq ($(ONLY_64BIT),0)
|
||||||
./test_avx512.sh
|
CXX=$(CXX) ./test_avx512.sh
|
||||||
endif
|
endif
|
||||||
ifeq ($(BIT),64)
|
ifeq ($(BIT),64)
|
||||||
./test_avx512.sh 64
|
CXX=$(CXX) ./test_avx512.sh 64
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test:
|
detect_x32: detect_x32.c
|
||||||
|
$(CC) $< -o $@
|
||||||
|
|
||||||
|
test: detect_x32
|
||||||
$(MAKE) test_nm
|
$(MAKE) test_nm
|
||||||
$(MAKE) test_avx
|
$(MAKE) test_avx
|
||||||
$(MAKE) test_avx512
|
$(MAKE) test_avx512
|
||||||
|
@ -104,4 +112,3 @@ lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||||
make_nm: make_nm.cpp $(XBYAK_INC)
|
make_nm: make_nm.cpp $(XBYAK_INC)
|
||||||
|
|
||||||
|
|
||||||
|
|
2
externals/xbyak/test/Makefile.win
vendored
2
externals/xbyak/test/Makefile.win
vendored
|
@ -1,4 +1,4 @@
|
||||||
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS
|
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS -I ../
|
||||||
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
||||||
../gen/gen_code.exe > $@
|
../gen/gen_code.exe > $@
|
||||||
../gen/gen_avx512.exe >> $@
|
../gen/gen_avx512.exe >> $@
|
||||||
|
|
8
externals/xbyak/test/detect_x32.c
vendored
Normal file
8
externals/xbyak/test/detect_x32.c
vendored
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
#if defined(__x86_64__) && defined(__ILP32__)
|
||||||
|
puts("x32");
|
||||||
|
#endif
|
||||||
|
}
|
54
externals/xbyak/test/make_512.cpp
vendored
54
externals/xbyak/test/make_512.cpp
vendored
|
@ -1807,44 +1807,44 @@ public:
|
||||||
put("vpmovd2m", K, _XMM | _YMM | _ZMM);
|
put("vpmovd2m", K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovq2m", K, _XMM | _YMM | _ZMM);
|
put("vpmovq2m", K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovsqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovsqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovusqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovusqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovsqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovsqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovusqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovusqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovqd", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovqd", YMM_KZ | _MEM, _ZMM);
|
put("vpmovqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovsqd", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovsqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovsqd", YMM_KZ | _MEM, _ZMM);
|
put("vpmovsqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovusqd", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovusqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovusqd", YMM_KZ | _MEM, _ZMM);
|
put("vpmovusqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovsdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovsdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovusdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovusdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovdw", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovdw", YMM_KZ | _MEM, _ZMM);
|
put("vpmovdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovsdw", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovsdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovsdw", YMM_KZ | _MEM, _ZMM);
|
put("vpmovsdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovusdw", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovusdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovusdw", YMM_KZ | _MEM, _ZMM);
|
put("vpmovusdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovwb", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovwb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovwb", YMM_KZ | _MEM, _ZMM);
|
put("vpmovwb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovswb", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovswb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovswb", YMM_KZ | _MEM, _ZMM);
|
put("vpmovswb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovuswb", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovuswb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovuswb", YMM_KZ | _MEM, _ZMM);
|
put("vpmovuswb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
}
|
}
|
||||||
void putRot()
|
void putRot()
|
||||||
{
|
{
|
||||||
|
|
13
externals/xbyak/test/make_nm.cpp
vendored
13
externals/xbyak/test/make_nm.cpp
vendored
|
@ -533,6 +533,7 @@ class Test {
|
||||||
"nop",
|
"nop",
|
||||||
|
|
||||||
"sahf",
|
"sahf",
|
||||||
|
"serialize",
|
||||||
"stc",
|
"stc",
|
||||||
"std",
|
"std",
|
||||||
"sti",
|
"sti",
|
||||||
|
@ -1017,9 +1018,7 @@ class Test {
|
||||||
}
|
}
|
||||||
void putCmov() const
|
void putCmov() const
|
||||||
{
|
{
|
||||||
const struct {
|
const char tbl[][4] = {
|
||||||
const char *s;
|
|
||||||
} tbl[] = {
|
|
||||||
"o",
|
"o",
|
||||||
"no",
|
"no",
|
||||||
"b",
|
"b",
|
||||||
|
@ -1053,11 +1052,11 @@ class Test {
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
char buf[32];
|
char buf[32];
|
||||||
snprintf(buf, sizeof(buf), "cmov%s", tbl[i].s);
|
snprintf(buf, sizeof(buf), "cmov%s", tbl[i]);
|
||||||
put(buf, REG16, REG16|MEM);
|
put(buf, REG16, REG16|MEM);
|
||||||
put(buf, REG32, REG32|MEM);
|
put(buf, REG32, REG32|MEM);
|
||||||
put(buf, REG64, REG64|MEM);
|
put(buf, REG64, REG64|MEM);
|
||||||
snprintf(buf, sizeof(buf), "set%s", tbl[i].s);
|
snprintf(buf, sizeof(buf), "set%s", tbl[i]);
|
||||||
put(buf, REG8|REG8_3|MEM);
|
put(buf, REG8|REG8_3|MEM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1294,7 +1293,7 @@ class Test {
|
||||||
put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
|
put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
|
||||||
put("movbe", REG16|REG32e, MEM);
|
put("movbe", REG16|REG32e, MEM);
|
||||||
put("movbe", MEM, REG16|REG32e);
|
put("movbe", MEM, REG16|REG32e);
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
|
put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
|
||||||
put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
|
put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
|
||||||
put(p, "qword [rax], 0");
|
put(p, "qword [rax], 0");
|
||||||
|
@ -2608,7 +2607,7 @@ public:
|
||||||
putMPX();
|
putMPX();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
|
|
||||||
#ifdef USE_YASM
|
#ifdef USE_YASM
|
||||||
putRip();
|
putRip();
|
||||||
|
|
189
externals/xbyak/test/misc.cpp
vendored
189
externals/xbyak/test/misc.cpp
vendored
|
@ -5,6 +5,7 @@
|
||||||
#include <xbyak/xbyak_util.h>
|
#include <xbyak/xbyak_util.h>
|
||||||
#include <cybozu/inttype.hpp>
|
#include <cybozu/inttype.hpp>
|
||||||
#include <cybozu/test.hpp>
|
#include <cybozu/test.hpp>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
|
|
||||||
|
@ -97,13 +98,17 @@ CYBOZU_TEST_AUTO(mov_const)
|
||||||
}
|
}
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x7fffffff]));
|
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x7fffffff]));
|
||||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x17fffffff]), Xbyak::Error);
|
if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32
|
||||||
|
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x17fffffff]), Xbyak::Error);
|
||||||
|
}
|
||||||
#ifdef XBYAK_OLD_DISP_CHECK
|
#ifdef XBYAK_OLD_DISP_CHECK
|
||||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x80000000]));
|
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x80000000]));
|
||||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0xffffffff]));
|
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0xffffffff]));
|
||||||
#else
|
#else
|
||||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x80000000ull]), Xbyak::Error);
|
if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32
|
||||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0xffffffffull]), Xbyak::Error);
|
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x80000000ull]), Xbyak::Error);
|
||||||
|
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0xffffffffull]), Xbyak::Error);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -875,6 +880,10 @@ CYBOZU_TEST_AUTO(vnni)
|
||||||
vpdpbusd(xm0, xm1, xm2);
|
vpdpbusd(xm0, xm1, xm2);
|
||||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX
|
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX
|
||||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX
|
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX
|
||||||
|
setDefaultEncoding(VexEncoding);
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // VEX
|
||||||
|
setDefaultEncoding(EvexEncoding);
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // EVEX
|
||||||
}
|
}
|
||||||
void badVex()
|
void badVex()
|
||||||
{
|
{
|
||||||
|
@ -885,6 +894,8 @@ CYBOZU_TEST_AUTO(vnni)
|
||||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||||
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
||||||
|
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
||||||
|
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||||
};
|
};
|
||||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
@ -1975,3 +1986,175 @@ CYBOZU_TEST_AUTO(cpu)
|
||||||
Cpu cpu;
|
Cpu cpu;
|
||||||
CYBOZU_TEST_EQUAL(cpu.has(Cpu::tINTEL) && cpu.has(Cpu::tAMD), cpu.has(Cpu::tINTEL | Cpu::tAMD));
|
CYBOZU_TEST_EQUAL(cpu.has(Cpu::tINTEL) && cpu.has(Cpu::tAMD), cpu.has(Cpu::tINTEL | Cpu::tAMD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(minmax)
|
||||||
|
{
|
||||||
|
using namespace Xbyak::util;
|
||||||
|
CYBOZU_TEST_EQUAL((std::min)(3, 4), local::min_(3, 4));
|
||||||
|
CYBOZU_TEST_EQUAL((std::max)(3, 4), local::max_(3, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(rao_int)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
#ifdef XBYAK64
|
||||||
|
aadd(ptr[rax], ecx);
|
||||||
|
aadd(ptr[eax], ecx);
|
||||||
|
aadd(ptr[rax], r10);
|
||||||
|
aand(ptr[rax], ecx);
|
||||||
|
aand(ptr[eax], ecx);
|
||||||
|
aand(ptr[rax], r10);
|
||||||
|
aor(ptr[rax], ecx);
|
||||||
|
aor(ptr[eax], ecx);
|
||||||
|
aor(ptr[rax], r10);
|
||||||
|
axor(ptr[rax], ecx);
|
||||||
|
axor(ptr[eax], ecx);
|
||||||
|
axor(ptr[rax], r10);
|
||||||
|
#else
|
||||||
|
aadd(ptr[eax], ecx);
|
||||||
|
aand(ptr[eax], ecx);
|
||||||
|
aor(ptr[eax], ecx);
|
||||||
|
axor(ptr[eax], ecx);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
#ifdef XBYAK64
|
||||||
|
// aadd
|
||||||
|
0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
|
||||||
|
// aand
|
||||||
|
0x66, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x66, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x66, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
|
||||||
|
// aor
|
||||||
|
0xf2, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf2, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf2, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
|
||||||
|
// axor
|
||||||
|
0xf3, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf3, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf3, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
#else
|
||||||
|
// aadd
|
||||||
|
0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
// aand
|
||||||
|
0x66, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
// aor
|
||||||
|
0xf2, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
// axor
|
||||||
|
0xf3, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef XBYAK64
|
||||||
|
CYBOZU_TEST_AUTO(CMPccXADD)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
// 32bit reg
|
||||||
|
cmpbexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpbxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmplexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmplxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnbexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnbxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnlexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnlxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnoxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnpxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnsxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnzxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpoxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmppxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpsxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpzxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
// 64bit reg
|
||||||
|
cmpbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmplexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmplxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmppxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
// 32bit reg
|
||||||
|
0xc4, 0xa2, 0x69, 0xe6, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe2, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xee, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xec, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe7, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe3, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xef, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xed, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe1, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xeb, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe9, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe5, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe0, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xea, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe8, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe4, 0x0c, 0x90,
|
||||||
|
// 64bit reg
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe6, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe2, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xee, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xec, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe7, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe3, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xef, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xed, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe1, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xeb, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe9, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe5, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe0, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xea, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe8, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe4, 0x0c, 0x90,
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(prefetchiti)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
prefetchit0(ptr[rax]);
|
||||||
|
prefetchit1(ptr[rax]);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0x0f, 0x18, 0x38,
|
||||||
|
0x0f, 0x18, 0x30
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
2
externals/xbyak/test/noexception.cpp
vendored
2
externals/xbyak/test/noexception.cpp
vendored
|
@ -56,7 +56,7 @@ void test2()
|
||||||
void test3()
|
void test3()
|
||||||
{
|
{
|
||||||
static struct EmptyAllocator : Xbyak::Allocator {
|
static struct EmptyAllocator : Xbyak::Allocator {
|
||||||
uint8_t *alloc() { return 0; }
|
uint8_t *alloc(size_t) { return 0; }
|
||||||
} emptyAllocator;
|
} emptyAllocator;
|
||||||
struct Code : CodeGenerator {
|
struct Code : CodeGenerator {
|
||||||
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
||||||
|
|
10
externals/xbyak/test/test_address.sh
vendored
10
externals/xbyak/test/test_address.sh
vendored
|
@ -1,13 +1,17 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER="grep -v warning"
|
FILTER="grep -v warning"
|
||||||
|
|
||||||
sub()
|
sub()
|
||||||
{
|
{
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
CFLAGS="-Wall -I../ $OPT2"
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
echo "compile address.cpp"
|
echo "compile address.cpp"
|
||||||
g++ $CFLAGS address.cpp -o address
|
$CXX $CFLAGS address.cpp -o address
|
||||||
|
|
||||||
./address $1 > a.asm
|
./address $1 > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -17,7 +21,7 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./address $1 jit > nm.cpp
|
./address $1 jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||||
./nm_frame > x.lst
|
./nm_frame > x.lst
|
||||||
diff ok.lst x.lst && echo "ok"
|
diff ok.lst x.lst && echo "ok"
|
||||||
|
|
||||||
|
|
9
externals/xbyak/test/test_avx.sh
vendored
9
externals/xbyak/test/test_avx.sh
vendored
|
@ -1,6 +1,9 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER="grep -v warning"
|
FILTER="grep -v warning"
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
case $1 in
|
case $1 in
|
||||||
Y)
|
Y)
|
||||||
|
@ -31,9 +34,9 @@ Y64)
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
|
CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX"
|
||||||
echo "compile make_nm.cpp"
|
echo "compile make_nm.cpp"
|
||||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||||
|
|
||||||
./make_nm > a.asm
|
./make_nm > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -43,6 +46,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./make_nm jit > nm.cpp
|
./make_nm jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||||
./nm_frame | $FILTER > x.lst
|
./nm_frame | $FILTER > x.lst
|
||||||
diff -B ok.lst x.lst && echo "ok"
|
diff -B ok.lst x.lst && echo "ok"
|
||||||
|
|
9
externals/xbyak/test/test_avx512.sh
vendored
9
externals/xbyak/test/test_avx512.sh
vendored
|
@ -1,6 +1,9 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER="grep -v warning"
|
FILTER="grep -v warning"
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
case $1 in
|
case $1 in
|
||||||
64)
|
64)
|
||||||
|
@ -18,9 +21,9 @@ case $1 in
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
|
CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX512"
|
||||||
echo "compile make_512.cpp"
|
echo "compile make_512.cpp"
|
||||||
g++ $CFLAGS make_512.cpp -o make_512
|
$CXX $CFLAGS make_512.cpp -o make_512
|
||||||
|
|
||||||
./make_512 > a.asm
|
./make_512 > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -30,6 +33,6 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./make_512 jit > nm.cpp
|
./make_512 jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||||
./nm_frame | $FILTER > x.lst
|
./nm_frame | $FILTER > x.lst
|
||||||
diff -B ok.lst x.lst && echo "ok"
|
diff -B ok.lst x.lst && echo "ok"
|
||||||
|
|
9
externals/xbyak/test/test_nm.sh
vendored
9
externals/xbyak/test/test_nm.sh
vendored
|
@ -1,6 +1,9 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER=cat
|
FILTER=cat
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
case $1 in
|
case $1 in
|
||||||
Y)
|
Y)
|
||||||
|
@ -44,9 +47,9 @@ noexcept)
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
CFLAGS="-Wall -I../ $OPT2"
|
||||||
echo "compile make_nm.cpp with $CFLAGS"
|
echo "compile make_nm.cpp with $CFLAGS"
|
||||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||||
|
|
||||||
./make_nm > a.asm
|
./make_nm > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -56,6 +59,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./make_nm jit > nm.cpp
|
./make_nm jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||||
./nm_frame | $FILTER > x.lst
|
./nm_frame | $FILTER > x.lst
|
||||||
diff -B ok.lst x.lst && echo "ok"
|
diff -B ok.lst x.lst && echo "ok"
|
||||||
|
|
37
externals/xbyak/xbyak/xbyak.h
vendored
37
externals/xbyak/xbyak/xbyak.h
vendored
|
@ -118,7 +118,7 @@
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1900)
|
||||||
#undef XBYAK_TLS
|
#undef XBYAK_TLS
|
||||||
#define XBYAK_TLS thread_local
|
#define XBYAK_TLS thread_local
|
||||||
#define XBYAK_VARIADIC_TEMPLATE
|
#define XBYAK_VARIADIC_TEMPLATE
|
||||||
|
@ -144,11 +144,18 @@
|
||||||
#pragma warning(disable : 4127) /* constant expresison */
|
#pragma warning(disable : 4127) /* constant expresison */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// disable -Warray-bounds because it may be a bug of gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104603
|
||||||
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
|
#define XBYAK_DISABLE_WARNING_ARRAY_BOUNDS
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Xbyak {
|
namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x6610 /* 0xABCD = A.BC(.D) */
|
VERSION = 0x6680 /* 0xABCD = A.BC(.D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
@ -371,7 +378,7 @@ inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0
|
||||||
|
|
||||||
inline uint32_t VerifyInInt32(uint64_t x)
|
inline uint32_t VerifyInInt32(uint64_t x)
|
||||||
{
|
{
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
|
if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
|
||||||
#endif
|
#endif
|
||||||
return static_cast<uint32_t>(x);
|
return static_cast<uint32_t>(x);
|
||||||
|
@ -1478,7 +1485,6 @@ public:
|
||||||
clabelDefList_.clear();
|
clabelDefList_.clear();
|
||||||
clabelUndefList_.clear();
|
clabelUndefList_.clear();
|
||||||
resetLabelPtrList();
|
resetLabelPtrList();
|
||||||
ClearError();
|
|
||||||
}
|
}
|
||||||
void enterLocal()
|
void enterLocal()
|
||||||
{
|
{
|
||||||
|
@ -1820,7 +1826,7 @@ private:
|
||||||
void setSIB(const RegExp& e, int reg, int disp8N = 0)
|
void setSIB(const RegExp& e, int reg, int disp8N = 0)
|
||||||
{
|
{
|
||||||
uint64_t disp64 = e.getDisp();
|
uint64_t disp64 = e.getDisp();
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
#ifdef XBYAK_OLD_DISP_CHECK
|
#ifdef XBYAK_OLD_DISP_CHECK
|
||||||
// treat 0xffffffff as 0xffffffffffffffff
|
// treat 0xffffffff as 0xffffffffffffffff
|
||||||
uint64_t high = disp64 >> 32;
|
uint64_t high = disp64 >> 32;
|
||||||
|
@ -2412,18 +2418,21 @@ private:
|
||||||
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
||||||
opVex(x, 0, addr, type, code);
|
opVex(x, 0, addr, type, code);
|
||||||
}
|
}
|
||||||
void opVnni(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
|
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
|
||||||
{
|
{
|
||||||
|
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code0);
|
||||||
|
}
|
||||||
|
int orEvexIf(PreferredEncoding encoding) {
|
||||||
if (encoding == DefaultEncoding) {
|
if (encoding == DefaultEncoding) {
|
||||||
encoding = EvexEncoding;
|
encoding = defaultEncoding_;
|
||||||
}
|
}
|
||||||
if (encoding == EvexEncoding) {
|
if (encoding == EvexEncoding) {
|
||||||
#ifdef XBYAK_DISABLE_AVX512
|
#ifdef XBYAK_DISABLE_AVX512
|
||||||
XBYAK_THROW(ERR_EVEX_IS_INVALID)
|
XBYAK_THROW(ERR_EVEX_IS_INVALID)
|
||||||
#endif
|
#endif
|
||||||
type |= T_MUST_EVEX;
|
return T_MUST_EVEX;
|
||||||
}
|
}
|
||||||
opAVX_X_X_XM(x1, x2, op, type, code0);
|
return 0;
|
||||||
}
|
}
|
||||||
void opInOut(const Reg& a, const Reg& d, uint8_t code)
|
void opInOut(const Reg& a, const Reg& d, uint8_t code)
|
||||||
{
|
{
|
||||||
|
@ -2508,6 +2517,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
private:
|
private:
|
||||||
bool isDefaultJmpNEAR_;
|
bool isDefaultJmpNEAR_;
|
||||||
|
PreferredEncoding defaultEncoding_;
|
||||||
public:
|
public:
|
||||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||||
void L(Label& label) { labelMgr_.defineClabel(label); }
|
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||||
|
@ -2787,11 +2797,13 @@ public:
|
||||||
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
|
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
|
||||||
#endif
|
#endif
|
||||||
, isDefaultJmpNEAR_(false)
|
, isDefaultJmpNEAR_(false)
|
||||||
|
, defaultEncoding_(EvexEncoding)
|
||||||
{
|
{
|
||||||
labelMgr_.set(this);
|
labelMgr_.set(this);
|
||||||
}
|
}
|
||||||
void reset()
|
void reset()
|
||||||
{
|
{
|
||||||
|
ClearError();
|
||||||
resetSize();
|
resetSize();
|
||||||
labelMgr_.reset();
|
labelMgr_.reset();
|
||||||
labelMgr_.set(this);
|
labelMgr_.set(this);
|
||||||
|
@ -2823,6 +2835,9 @@ public:
|
||||||
#undef jnl
|
#undef jnl
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// set default encoding to select Vex or Evex
|
||||||
|
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
use single byte nop if useMultiByteNop = false
|
use single byte nop if useMultiByteNop = false
|
||||||
*/
|
*/
|
||||||
|
@ -2927,6 +2942,10 @@ static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segmen
|
||||||
#pragma warning(pop)
|
#pragma warning(pop)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
} // end of namespace
|
} // end of namespace
|
||||||
|
|
||||||
#endif // XBYAK_XBYAK_H_
|
#endif // XBYAK_XBYAK_H_
|
||||||
|
|
88
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
88
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
|
@ -1,4 +1,6 @@
|
||||||
const char *getVersionString() const { return "6.61"; }
|
const char *getVersionString() const { return "6.68"; }
|
||||||
|
void aadd(const Address& addr, const Reg32e ®) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
|
void aand(const Address& addr, const Reg32e ®) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
||||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||||
|
@ -24,6 +26,8 @@ void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXM
|
||||||
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
|
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
|
||||||
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
|
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
|
||||||
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
|
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
|
||||||
|
void aor(const Address& addr, const Reg32e ®) { db(0xF2); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
|
void axor(const Address& addr, const Reg32e ®) { db(0xF3); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
|
void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
|
||||||
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||||
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||||
|
@ -654,6 +658,8 @@ void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); }
|
||||||
void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
|
void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
|
||||||
void popf() { db(0x9D); }
|
void popf() { db(0x9D); }
|
||||||
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
|
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
|
||||||
|
void prefetchit0(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0x18); }
|
||||||
|
void prefetchit1(const Address& addr) { opModM(addr, Reg32(6), 0x0F, 0x18); }
|
||||||
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, 0x18); }
|
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, 0x18); }
|
||||||
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x18); }
|
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x18); }
|
||||||
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x18); }
|
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x18); }
|
||||||
|
@ -747,6 +753,7 @@ void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
|
||||||
void scasb() { db(0xAE); }
|
void scasb() { db(0xAE); }
|
||||||
void scasd() { db(0xAF); }
|
void scasd() { db(0xAF); }
|
||||||
void scasw() { db(0x66); db(0xAF); }
|
void scasw() { db(0x66); db(0xAF); }
|
||||||
|
void serialize() { db(0x0F); db(0x01); db(0xE8); }
|
||||||
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
||||||
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
||||||
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
||||||
|
@ -844,6 +851,8 @@ void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
||||||
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
|
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
|
||||||
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
|
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
|
||||||
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
|
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
|
||||||
|
void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
|
||||||
|
void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
|
||||||
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
|
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
|
||||||
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
|
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
|
||||||
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
|
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
|
||||||
|
@ -988,6 +997,11 @@ void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T
|
||||||
void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
|
void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
|
||||||
void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
|
void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
|
||||||
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
||||||
|
void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }
|
||||||
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
|
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
|
||||||
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
|
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
|
||||||
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
||||||
|
@ -1191,10 +1205,16 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
|
||||||
void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
|
void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
|
||||||
void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
|
void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
|
||||||
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
|
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
|
||||||
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); }
|
void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||||
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); }
|
void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||||
|
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); }
|
||||||
|
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); }
|
||||||
|
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||||
|
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||||
|
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
||||||
|
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
||||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
||||||
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
||||||
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
||||||
|
@ -1226,6 +1246,8 @@ void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if
|
||||||
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
|
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
|
||||||
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
|
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
|
||||||
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
|
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
|
||||||
|
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB5, encoding); }
|
||||||
|
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB4, encoding); }
|
||||||
void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
|
void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
|
||||||
void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
|
void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
|
||||||
void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
|
void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
|
||||||
|
@ -1642,6 +1664,22 @@ void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx())
|
||||||
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
|
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
|
||||||
void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
|
void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
|
||||||
void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
|
void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
|
||||||
|
void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE6, false); }
|
||||||
|
void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE2, false); }
|
||||||
|
void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEE, false); }
|
||||||
|
void cmplxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEC, false); }
|
||||||
|
void cmpnbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE7, false); }
|
||||||
|
void cmpnbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE3, false); }
|
||||||
|
void cmpnlexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEF, false); }
|
||||||
|
void cmpnlxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xED, false); }
|
||||||
|
void cmpnoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE1, false); }
|
||||||
|
void cmpnpxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEB, false); }
|
||||||
|
void cmpnsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE9, false); }
|
||||||
|
void cmpnzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE5, false); }
|
||||||
|
void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE0, false); }
|
||||||
|
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEA, false); }
|
||||||
|
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE8, false); }
|
||||||
|
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE4, false); }
|
||||||
void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
|
void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
|
||||||
void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
||||||
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
||||||
|
@ -1653,6 +1691,7 @@ void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T
|
||||||
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
||||||
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
|
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
|
||||||
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
|
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
|
||||||
|
void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }
|
||||||
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
|
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
|
||||||
#else
|
#else
|
||||||
void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
||||||
|
@ -1907,7 +1946,6 @@ void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 |
|
||||||
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||||
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
|
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
|
||||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||||
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
|
||||||
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
|
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
|
||||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
||||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||||
|
@ -2141,38 +2179,36 @@ void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T
|
||||||
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
|
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
|
||||||
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
||||||
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
||||||
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
|
|
||||||
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB4); }
|
|
||||||
void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); }
|
void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); }
|
||||||
void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); }
|
void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); }
|
||||||
void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); }
|
void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); }
|
||||||
void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); }
|
void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); }
|
||||||
void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
|
void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
|
||||||
void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
|
void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
|
||||||
void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x31, false); }
|
void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x31, false); }
|
||||||
void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x33, true); }
|
void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x33, true); }
|
||||||
void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
|
void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
|
||||||
void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
|
void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
|
||||||
void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
|
void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
|
||||||
void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
|
void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
|
||||||
void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
|
void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
|
||||||
void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x32, false); }
|
void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x32, false); }
|
||||||
void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x35, true); }
|
void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x35, true); }
|
||||||
void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x34, false); }
|
void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x34, false); }
|
||||||
void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x21, false); }
|
void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x21, false); }
|
||||||
void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x23, true); }
|
void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x23, true); }
|
||||||
void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x22, false); }
|
void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x22, false); }
|
||||||
void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x25, true); }
|
void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x25, true); }
|
||||||
void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x24, false); }
|
void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x24, false); }
|
||||||
void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x20, true); }
|
void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x20, true); }
|
||||||
void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x11, false); }
|
void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x11, false); }
|
||||||
void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x13, true); }
|
void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x13, true); }
|
||||||
void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x12, false); }
|
void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x12, false); }
|
||||||
void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x15, true); }
|
void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x15, true); }
|
||||||
void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x14, false); }
|
void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x14, false); }
|
||||||
void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x10, true); }
|
void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x10, true); }
|
||||||
void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
|
void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
|
||||||
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x30, true); }
|
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x30, true); }
|
||||||
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
||||||
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
||||||
void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
||||||
|
|
32
externals/xbyak/xbyak/xbyak_util.h
vendored
32
externals/xbyak/xbyak/xbyak_util.h
vendored
|
@ -4,7 +4,6 @@
|
||||||
#ifdef XBYAK_ONLY_CLASS_CPU
|
#ifdef XBYAK_ONLY_CLASS_CPU
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <algorithm>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#ifndef XBYAK_THROW
|
#ifndef XBYAK_THROW
|
||||||
#define XBYAK_THROW(x) ;
|
#define XBYAK_THROW(x) ;
|
||||||
|
@ -96,6 +95,11 @@ struct TypeT {
|
||||||
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
||||||
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline T max_(T x, T y) { return x >= y ? x : y; }
|
||||||
|
template<typename T>
|
||||||
|
inline T min_(T x, T y) { return x < y ? x : y; }
|
||||||
|
|
||||||
} // local
|
} // local
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -193,8 +197,8 @@ private:
|
||||||
/*
|
/*
|
||||||
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
|
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
|
||||||
*/
|
*/
|
||||||
numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]);
|
numCores_[SmtLevel - 1] = local::max_(1u, numCores_[SmtLevel - 1]);
|
||||||
numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
numCores_[CoreLevel - 1] = local::max_(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
Failed to deremine num of cores without x2APIC support.
|
Failed to deremine num of cores without x2APIC support.
|
||||||
|
@ -237,7 +241,7 @@ private:
|
||||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||||
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
actual_logical_cores = local::min_(actual_logical_cores, logical_cores);
|
||||||
}
|
}
|
||||||
assert(actual_logical_cores != 0);
|
assert(actual_logical_cores != 0);
|
||||||
dataCacheSize_[dataCacheLevels_] =
|
dataCacheSize_[dataCacheLevels_] =
|
||||||
|
@ -247,7 +251,7 @@ private:
|
||||||
* (data[2] + 1);
|
* (data[2] + 1);
|
||||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||||
assert(smt_width != 0);
|
assert(smt_width != 0);
|
||||||
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
coresSharignDataCache_[dataCacheLevels_] = local::max_(actual_logical_cores / smt_width, 1u);
|
||||||
dataCacheLevels_++;
|
dataCacheLevels_++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -302,7 +306,7 @@ public:
|
||||||
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
|
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
|
||||||
{
|
{
|
||||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
#ifdef _MSC_VER
|
#ifdef _WIN32
|
||||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||||
#else
|
#else
|
||||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||||
|
@ -406,6 +410,13 @@ public:
|
||||||
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
|
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
|
||||||
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
|
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
|
||||||
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
|
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
|
||||||
|
XBYAK_DEFINE_TYPE(68, tAMX_FP16);
|
||||||
|
XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8);
|
||||||
|
XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT);
|
||||||
|
XBYAK_DEFINE_TYPE(71, tAVX_IFMA);
|
||||||
|
XBYAK_DEFINE_TYPE(72, tRAO_INT);
|
||||||
|
XBYAK_DEFINE_TYPE(73, tCMPCCXADD);
|
||||||
|
XBYAK_DEFINE_TYPE(74, tPREFETCHITI);
|
||||||
|
|
||||||
#undef XBYAK_SPLIT_ID
|
#undef XBYAK_SPLIT_ID
|
||||||
#undef XBYAK_DEFINE_TYPE
|
#undef XBYAK_DEFINE_TYPE
|
||||||
|
@ -545,10 +556,17 @@ public:
|
||||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||||
if (maxNumSubLeaves >= 1) {
|
if (maxNumSubLeaves >= 1) {
|
||||||
getCpuidEx(7, 1, data);
|
getCpuidEx(7, 1, data);
|
||||||
|
if (EAX & (1U << 3)) type_ |= tRAO_INT;
|
||||||
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
||||||
if (type_ & tAVX512F) {
|
if (type_ & tAVX512F) {
|
||||||
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
||||||
}
|
}
|
||||||
|
if (EAX & (1U << 7)) type_ |= tCMPCCXADD;
|
||||||
|
if (EAX & (1U << 21)) type_ |= tAMX_FP16;
|
||||||
|
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
|
||||||
|
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
|
||||||
|
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
|
||||||
|
if (EDX & (1U << 14)) type_ |= tPREFETCHITI;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setFamily();
|
setFamily();
|
||||||
|
@ -771,7 +789,7 @@ public:
|
||||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||||
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
|
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
|
||||||
const Reg64& _rsp = code->rsp;
|
const Reg64& _rsp = code->rsp;
|
||||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
saveNum_ = local::max_(0, allRegNum - noSaveNum);
|
||||||
const int *tbl = getOrderTbl() + noSaveNum;
|
const int *tbl = getOrderTbl() + noSaveNum;
|
||||||
for (int i = 0; i < saveNum_; i++) {
|
for (int i = 0; i < saveNum_; i++) {
|
||||||
code->push(Reg64(tbl[i]));
|
code->push(Reg64(tbl[i]));
|
||||||
|
|
Loading…
Reference in a new issue