externals/xbyak: Update xbyak to 5.77
Merge commit '080b4b3affbdc1d56f2f8230663725413ab03d21' into HEAD
This commit is contained in:
commit
b941cbbcfb
17 changed files with 994 additions and 489 deletions
5
externals/xbyak/gen/avx_type.hpp
vendored
5
externals/xbyak/gen/avx_type.hpp
vendored
|
@ -37,6 +37,7 @@
|
|||
T_B64 = 1 << 27, // m64bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_XXX
|
||||
};
|
||||
|
||||
|
@ -161,5 +162,9 @@ std::string type2String(int type)
|
|||
if (!str.empty()) str += " | ";
|
||||
str += "T_VSIB";
|
||||
}
|
||||
if (type & T_MEM_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_MEM_EVEX";
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
|
22
externals/xbyak/gen/gen_code.cpp
vendored
22
externals/xbyak/gen/gen_code.cpp
vendored
|
@ -76,7 +76,7 @@ void putX_X_XM(bool omitOnly)
|
|||
{ 0xC2, "cmpss", T_0F | T_F3, true, true, 2 },
|
||||
{ 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 },
|
||||
{ 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 },
|
||||
{ 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, true, true, 2 },
|
||||
{ 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, true, true, 2 },
|
||||
{ 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
|
||||
{ 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
|
||||
{ 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
|
||||
|
@ -1491,16 +1491,16 @@ void put()
|
|||
int idx;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX },
|
||||
{ "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX },
|
||||
{ "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX },
|
||||
{ "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
|
||||
{ "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 },
|
||||
{ "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX },
|
||||
{ "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
|
||||
{ "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX },
|
||||
{ "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
|
||||
{ "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 },
|
||||
{ "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||
{ "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||
{ "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||
{ "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
|
||||
{ "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
|
||||
{ "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||
{ "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
|
||||
{ "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||
{ "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
|
||||
{ "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
|
|
327
externals/xbyak/readme.md
vendored
327
externals/xbyak/readme.md
vendored
|
@ -1,107 +1,121 @@
|
|||
|
||||
Xbyak 5.67 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
=============
|
||||
# Xbyak 5.77 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
|
||||
Abstract
|
||||
-------------
|
||||
## Abstract
|
||||
|
||||
This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
|
||||
|
||||
Feature
|
||||
-------------
|
||||
header file only
|
||||
you can use Xbyak's functions at once if xbyak.h is included.
|
||||
## Feature
|
||||
* header file only
|
||||
* Intel/MASM like syntax
|
||||
* fully support AVX-512
|
||||
|
||||
### Supported Instructions Sets
|
||||
**Note**: Xbyak uses and(), or(), xor(), not() functions, so `-fno-operator-names` option is necessary for gcc/clang.
|
||||
|
||||
MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(*partial*)/AVX/AVX2/FMA/VEX-encoded GPR/AVX-512
|
||||
Or define `XBYAK_NO_OP_NAMES` before including `xbyak.h` and use and_(), or_(), xor_(), not_() instead of them.
|
||||
|
||||
and_(), or_(), xor_(), not_() are always available.
|
||||
|
||||
`XBYAK_NO_OP_NAMES` will be defined in the feature version.
|
||||
|
||||
### Supported OS
|
||||
|
||||
* Windows Xp, Vista, Windows 7(32bit, 64bit)
|
||||
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
|
||||
* Linux(32bit, 64bit)
|
||||
* Intel Mac OSX
|
||||
* Intel macOS
|
||||
|
||||
### Supported Compilers
|
||||
|
||||
* Visual Studio C++ VC2012 or later
|
||||
* gcc 4.7 or later
|
||||
* clang 3.3
|
||||
* cygwin gcc 4.5.3
|
||||
* icc 7.2
|
||||
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
|
||||
|
||||
>Note: Xbyak uses and(), or(), xor(), not() functions, so "-fno-operator-names" option is required on gcc.
|
||||
Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_() instead of them.
|
||||
and_(), or_(), xor_(), not_() are always available.
|
||||
## Install
|
||||
|
||||
Install
|
||||
-------------
|
||||
|
||||
The following files are necessary. Please add the path to your compile directories.
|
||||
The following files are necessary. Please add the path to your compile directory.
|
||||
|
||||
* xbyak.h
|
||||
* xbyak_mnemonic.h
|
||||
* xbyak_util.h
|
||||
|
||||
Linux:
|
||||
|
||||
```
|
||||
make install
|
||||
```
|
||||
|
||||
These files are copied into /usr/local/include/xbyak
|
||||
These files are copied into `/usr/local/include/xbyak`.
|
||||
|
||||
New Feature
|
||||
-------------
|
||||
## How to use it
|
||||
|
||||
Add support for AVX-512 instruction set.
|
||||
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||
```
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
Syntax
|
||||
-------------
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
```
|
||||
Make an instance of the class and get the function
|
||||
pointer by calling `getCode()` and call it.
|
||||
```
|
||||
Code c(5);
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("ret=%d\n", f()); // ret = 5
|
||||
```
|
||||
|
||||
Make Xbyak::CodeGenerator and make the class method and get the function
|
||||
pointer by calling cgetCode() and casting the return value.
|
||||
## Syntax
|
||||
Similar to MASM/NASM syntax with parentheses.
|
||||
|
||||
```
|
||||
NASM Xbyak
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
ret --> ret();
|
||||
```
|
||||
|
||||
### Addressing
|
||||
## Addressing
|
||||
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||
otherwise use `ptr`.
|
||||
|
||||
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
```
|
||||
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
|
||||
NASM Xbyak
|
||||
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
||||
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
||||
test byte [esp], 4 --> test(byte [esp], 4);
|
||||
|
||||
|
||||
How to use Selector(Segment Register)
|
||||
|
||||
>Note: Segment class is not derived from Operand.
|
||||
|
||||
inc qword [rax] --> inc(qword [rax]);
|
||||
```
|
||||
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
|
||||
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||
|
||||
### How to use Selector (Segment Register)
|
||||
```
|
||||
mov eax, [fs:eax] --> putSeg(fs);
|
||||
mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
```
|
||||
**Note**: Segment class is not derived from `Operand`.
|
||||
|
||||
>you can use ptr for almost memory access unless you specify the size of memory.
|
||||
|
||||
>dword, word and byte are member variables, then don't use dword as unsigned int, for example.
|
||||
|
||||
### AVX
|
||||
## AVX
|
||||
|
||||
```
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||
vgatherdpd(xmm1, ptr [ebp+123+xmm2*4], xmm3);
|
||||
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||
```
|
||||
|
||||
*Remark*
|
||||
The omitted destination syntax as the following ss disabled.
|
||||
**Note**:
|
||||
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||
But the newer version will not support it.
|
||||
```
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
```
|
||||
define `XBYAK_ENABLE_OMITTED_OPERAND` if you use it for backward compatibility.
|
||||
But the newer version will not support it.
|
||||
|
||||
### AVX-512
|
||||
## AVX-512
|
||||
|
||||
```
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
|
@ -130,15 +144,18 @@ vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5)
|
|||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
```
|
||||
Remark
|
||||
* k1, ..., k7 are new opmask registers.
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||
* `k4 | k3` is different from `k3 | k4`.
|
||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
* specify xword/yword/zword(_b) for m128/m256/m512 if necessary.
|
||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||
|
||||
### Label
|
||||
## Label
|
||||
Two kinds of Label are supported. (String literal and Label class).
|
||||
|
||||
### String literal
|
||||
```
|
||||
L("L1");
|
||||
jmp("L1");
|
||||
|
||||
|
@ -153,12 +170,14 @@ Remark
|
|||
a lot of mnemonics (32-bit displacement jmp)
|
||||
...
|
||||
L("L3");
|
||||
```
|
||||
|
||||
>Call hasUndefinedLabel() to verify your code has no undefined label.
|
||||
> you can use a label for immediate value of mov like as mov (eax, "L2");
|
||||
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
||||
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
||||
|
||||
#### 1. support @@, @f, @b like MASM
|
||||
### Support `@@`, `@f`, `@b` like MASM
|
||||
|
||||
```
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
|
@ -166,37 +185,39 @@ Remark
|
|||
jmp("@b"); // jmp to <B>
|
||||
mov(eax, "@b");
|
||||
jmp(eax); // jmp to <B>
|
||||
```
|
||||
|
||||
#### 2. localization of label by calling inLocalLabel(), outLocallabel().
|
||||
### Local label
|
||||
|
||||
labels begining of period between inLocalLabel() and outLocalLabel()
|
||||
are dealed with local label.
|
||||
inLocalLabel() and outLocalLabel() can be nested.
|
||||
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
||||
are treated as a local label.
|
||||
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||
|
||||
```
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <A> ; local label
|
||||
...
|
||||
jmp(".lp"); // jmpt to <A>
|
||||
L("aaa"); // global label
|
||||
jmp(".lp"); // jmp to <A>
|
||||
L("aaa"); // global label <C>
|
||||
outLocalLabel();
|
||||
}
|
||||
|
||||
void func2()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <B> ; local label
|
||||
func1();
|
||||
jmp(".lp"); // jmp to <B>
|
||||
inLocalLabel();
|
||||
jmp("aaa"); // jmp to <C>
|
||||
}
|
||||
```
|
||||
|
||||
### Label class
|
||||
|
||||
L() and jxx() functions support a new Label class.
|
||||
`L()` and `jxx()` support Label class.
|
||||
|
||||
Label label1, label2;
|
||||
```
|
||||
Xbyak::Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
jmp(label1);
|
||||
|
@ -204,23 +225,41 @@ L() and jxx() functions support a new Label class.
|
|||
jmp(label2);
|
||||
...
|
||||
L(label2);
|
||||
```
|
||||
|
||||
Moreover, assignL(dstLabel, srcLabel) method binds dstLabel with srcLabel.
|
||||
Use `putL` for jmp table
|
||||
```
|
||||
Label labelTbl, L0, L1, L2;
|
||||
mov(rax, labelTbl);
|
||||
// rdx is an index of jump table
|
||||
jmp(ptr [rax + rdx * sizeof(void*)]);
|
||||
L(labelTbl);
|
||||
putL(L0);
|
||||
putL(L1);
|
||||
putL(L2);
|
||||
L(L0);
|
||||
....
|
||||
L(L1);
|
||||
....
|
||||
```
|
||||
|
||||
Label label1, label2;
|
||||
L(label1);
|
||||
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||
|
||||
```
|
||||
Label label2;
|
||||
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||
...
|
||||
jmp(label2);
|
||||
jmp(label2); // label2 is not determined here
|
||||
...
|
||||
assignL(label2, label1); // label2 <= label1
|
||||
assignL(label2, label1); // label2 <- label1
|
||||
```
|
||||
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
||||
|
||||
The above jmp opecode jumps label1.
|
||||
**Note**:
|
||||
* srcLabel must be used in `L()`.
|
||||
* dstLabel must not be used in `L()`.
|
||||
|
||||
* Restriction:
|
||||
* srcLabel must be used in L().
|
||||
* dstLabel must not be used in L().
|
||||
|
||||
Label::getAddress() returns the address specified by the label instance and 0 if not specified.
|
||||
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||
```
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
|
@ -229,7 +268,7 @@ L(label);
|
|||
assert(label.getAddress() == getCurr());
|
||||
```
|
||||
|
||||
### Rip
|
||||
### Rip ; relative addressing
|
||||
```
|
||||
Label label;
|
||||
mov(eax, ptr [rip + label]); // eax = 4
|
||||
|
@ -243,9 +282,12 @@ int x;
|
|||
...
|
||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||
```
|
||||
### Code size
|
||||
The default max code size is 4096 bytes. Please set it in constructor of CodeGenerator() if you want to use large size.
|
||||
|
||||
## Code size
|
||||
The default max code size is 4096 bytes.
|
||||
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||
|
||||
```
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
|
@ -254,39 +296,40 @@ The default max code size is 4096 bytes. Please set it in constructor of CodeGen
|
|||
}
|
||||
...
|
||||
};
|
||||
```
|
||||
|
||||
### use user allocated memory
|
||||
## User allocated memory
|
||||
|
||||
You can make jit code on prepaired memory.
|
||||
|
||||
class Sample : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Sample(void *userPtr, size_t size)
|
||||
: Xbyak::CodeGenerator(size, userPtr)
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory.
|
||||
|
||||
```
|
||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||
{
|
||||
...
|
||||
mov(rax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
const size_t codeSize = 1024;
|
||||
uint8 buf[codeSize + 16];
|
||||
int main()
|
||||
{
|
||||
Code c;
|
||||
c.setProtectModeRE(); // set memory to Read/Exec
|
||||
printf("%d\n", c.getCode<int(*)()>()());
|
||||
}
|
||||
```
|
||||
|
||||
// get 16-byte aligned address
|
||||
uint8 *p = Xbyak::CodeArray::getAlignedAddress(buf);
|
||||
**Note**: See [sample/test0.cpp](sample/test0.cpp).
|
||||
|
||||
// append executable attribute to the memory
|
||||
Xbyak::CodeArray::protect(p, codeSize, true);
|
||||
### AutoGrow
|
||||
|
||||
// construct your jit code on the memory
|
||||
Sample s(p, codeSize);
|
||||
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||
|
||||
>See *sample/test0.cpp*
|
||||
|
||||
AutoGrow
|
||||
-------------
|
||||
|
||||
Under `AutoGrow` mode, Xbyak extends memory automatically if necessary.
|
||||
Call ready() before calling getCode() to calc address of jmp.
|
||||
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
|
@ -296,39 +339,70 @@ Call ready() before calling getCode() to calc address of jmp.
|
|||
}
|
||||
};
|
||||
Code c;
|
||||
c.ready(); // Don't forget to call this function
|
||||
// generate code for jit
|
||||
c.ready(); // mode = Read/Write/Exec
|
||||
```
|
||||
>Don't use the address returned by getCurr() before calling ready().
|
||||
>It may be invalid address.
|
||||
>RESTRICTION : rip addressing is not supported in AutoGrow
|
||||
|
||||
Macro
|
||||
-------------
|
||||
**Note**:
|
||||
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
||||
|
||||
### Read/Exec mode
|
||||
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
||||
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||
call `setProtectModeRE()` after generating jit code.
|
||||
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE();
|
||||
...
|
||||
|
||||
```
|
||||
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||
See [protect-re.cpp](sample/protect-re.cpp).
|
||||
|
||||
## Macro
|
||||
|
||||
* **XBYAK32** is defined on 32bit.
|
||||
* **XBYAK64** is defined on 64bit.
|
||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
|
||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
|
||||
* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names`
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(duplicated in the future)
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future)
|
||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
|
||||
|
||||
Sample
|
||||
-------------
|
||||
## Sample
|
||||
|
||||
* test0.cpp ; tiny sample of Xbyak(x86, x64)
|
||||
* quantize.cpp ; JIT optimized quantization by fast division(x86 only)
|
||||
* calc.cpp ; assemble and estimate a given polynomial(x86, x64)
|
||||
* bf.cpp ; JIT brainfuck(x86, x64)
|
||||
* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64)
|
||||
* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
||||
* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
||||
* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
||||
|
||||
License
|
||||
-------------
|
||||
## License
|
||||
|
||||
modified new BSD License
|
||||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
History
|
||||
-------------
|
||||
## History
|
||||
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
||||
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
||||
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
||||
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||
* 2018/Jul/26 ver 5.661 support mingw64
|
||||
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||
|
@ -392,8 +466,7 @@ History
|
|||
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
||||
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
||||
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm).
|
||||
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
||||
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
||||
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
||||
|
@ -453,8 +526,6 @@ History
|
|||
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
||||
* 2007/Jan/4 first version
|
||||
|
||||
Author
|
||||
-------------
|
||||
|
||||
## Author
|
||||
MITSUNARI Shigeo(herumi@nifty.com)
|
||||
|
||||
|
|
52
externals/xbyak/readme.txt
vendored
52
externals/xbyak/readme.txt
vendored
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.67
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.77
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -245,8 +245,8 @@ void func2()
|
|||
|
||||
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
|
||||
|
||||
Label label1, label2;
|
||||
L(label1);
|
||||
Label label2;
|
||||
Label label1 = L(); // Label label1; L(label1);と同じ意味
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
|
@ -309,6 +309,41 @@ bool CodeArray::protect(const void *addr, size_t size, bool canExec);
|
|||
*/
|
||||
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
|
||||
|
||||
・read/execモード
|
||||
デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。
|
||||
コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE(); // read/execモードに変更
|
||||
// JIT領域を実行
|
||||
|
||||
AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.readyRE(); // read/exeモードに変更
|
||||
// JIT領域を実行
|
||||
|
||||
setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。
|
||||
|
||||
|
||||
その他詳細は各種サンプルを参照してください。
|
||||
-----------------------------------------------------------------------------
|
||||
◎マクロ
|
||||
|
@ -338,6 +373,17 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov)
|
||||
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
|
||||
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
|
||||
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す
|
||||
2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除
|
||||
2018/10/15 util::StackFrameでmovの代わりにpush/popを使う
|
||||
2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
|
||||
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
|
||||
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
|
||||
2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday)
|
||||
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
|
||||
2018/07/26 ver 5.661 mingw64対応
|
||||
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
|
||||
|
|
28
externals/xbyak/sample/bf.cpp
vendored
28
externals/xbyak/sample/bf.cpp
vendored
|
@ -10,12 +10,6 @@
|
|||
#endif
|
||||
|
||||
class Brainfuck : public Xbyak::CodeGenerator {
|
||||
private:
|
||||
enum Direction { B, F };
|
||||
std::string toStr(int labelNo, Direction dir)
|
||||
{
|
||||
return Xbyak::Label::toStr(labelNo) + (dir == B ? 'B' : 'F');
|
||||
}
|
||||
public:
|
||||
int getContinuousChar(std::istream& is, char c)
|
||||
{
|
||||
|
@ -67,8 +61,7 @@ public:
|
|||
mov(pGetchar, rsi); // getchar
|
||||
mov(stack, rdx); // stack
|
||||
#endif
|
||||
int labelNo = 0;
|
||||
std::stack<int> keepLabelNo;
|
||||
std::stack<Label> labelF, labelB;
|
||||
char c;
|
||||
while (is >> c) {
|
||||
switch (c) {
|
||||
|
@ -116,17 +109,22 @@ public:
|
|||
mov(cur, eax);
|
||||
break;
|
||||
case '[':
|
||||
L(toStr(labelNo, B));
|
||||
{
|
||||
Label B = L();
|
||||
labelB.push(B);
|
||||
mov(eax, cur);
|
||||
test(eax, eax);
|
||||
jz(toStr(labelNo, F), T_NEAR);
|
||||
keepLabelNo.push(labelNo++);
|
||||
Label F;
|
||||
jz(F, T_NEAR);
|
||||
labelF.push(F);
|
||||
}
|
||||
break;
|
||||
case ']':
|
||||
{
|
||||
int no = keepLabelNo.top(); keepLabelNo.pop();
|
||||
jmp(toStr(no, B));
|
||||
L(toStr(no, F));
|
||||
Label B = labelB.top(); labelB.pop();
|
||||
jmp(B);
|
||||
Label F = labelF.top(); labelF.pop();
|
||||
L(F);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -200,7 +198,7 @@ int main(int argc, char *argv[])
|
|||
Brainfuck bf(ifs);
|
||||
if (mode == 0) {
|
||||
static int stack[128 * 1024];
|
||||
bf.getCode<void (*)(void*, void*, int *)>()(Xbyak::CastTo<void*>(putchar), Xbyak::CastTo<void*>(getchar), stack);
|
||||
bf.getCode<void (*)(const void*, const void*, int *)>()(reinterpret_cast<const void*>(putchar), reinterpret_cast<const void*>(getchar), stack);
|
||||
} else {
|
||||
dump(bf.getCode(), bf.getSize());
|
||||
}
|
||||
|
|
70
externals/xbyak/sample/protect-re.cpp
vendored
Normal file
70
externals/xbyak/sample/protect-re.cpp
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code1 : Xbyak::CodeGenerator {
|
||||
Code1()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
void update()
|
||||
{
|
||||
db(0);
|
||||
}
|
||||
};
|
||||
|
||||
void test1(bool updateCode)
|
||||
{
|
||||
Code1 c;
|
||||
c.setProtectModeRE();
|
||||
if (updateCode) c.update(); // segmentation fault
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("f=%d\n", f());
|
||||
|
||||
c.setProtectModeRW();
|
||||
c.update();
|
||||
puts("ok");
|
||||
}
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
void update()
|
||||
{
|
||||
db(0);
|
||||
}
|
||||
};
|
||||
|
||||
void test2(bool updateCode)
|
||||
{
|
||||
Code2 c;
|
||||
c.readyRE();
|
||||
if (updateCode) c.update(); // segmentation fault
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("f=%d\n", f());
|
||||
|
||||
c.setProtectModeRW();
|
||||
c.update();
|
||||
puts("ok");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "%s <testNum> [update]\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
bool update = argc == 3;
|
||||
int n = atoi(argv[1]);
|
||||
printf("n=%d update=%d\n", n, update);
|
||||
switch (n) {
|
||||
case 1: test1(update); break;
|
||||
case 2: test2(update); break;
|
||||
default: fprintf(stderr, "no test %d\n", n); break;
|
||||
}
|
||||
}
|
2
externals/xbyak/sample/static_buf.cpp
vendored
2
externals/xbyak/sample/static_buf.cpp
vendored
|
@ -32,7 +32,7 @@ struct Code : Xbyak::CodeGenerator {
|
|||
|
||||
inline int add(int a, int b)
|
||||
{
|
||||
return Xbyak::CastTo<int (*)(int,int)>(buf)(a, b);
|
||||
return reinterpret_cast<int (*)(int, int)>(buf)(a, b);
|
||||
}
|
||||
|
||||
int main()
|
||||
|
|
9
externals/xbyak/sample/test0.cpp
vendored
9
externals/xbyak/sample/test0.cpp
vendored
|
@ -77,7 +77,7 @@ public:
|
|||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
call(atoi);
|
||||
#else
|
||||
call(Xbyak::CastTo<void*>(atoi));
|
||||
call(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
add(esp, 4);
|
||||
#endif
|
||||
|
@ -96,7 +96,7 @@ public:
|
|||
mov(rax, (size_t)atoi);
|
||||
jmp(rax);
|
||||
#else
|
||||
jmp(Xbyak::CastTo<void*>(atoi));
|
||||
jmp(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
}
|
||||
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
||||
|
@ -171,8 +171,9 @@ int main()
|
|||
return 1;
|
||||
}
|
||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||
if (Xbyak::CastTo<uint8*>(func) != p) {
|
||||
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
|
||||
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
|
||||
if (funcp != p) {
|
||||
fprintf(stderr, "internal error %p %p\n", p, funcp);
|
||||
return 1;
|
||||
}
|
||||
printf("0 + ... + %d = %d\n", 100, func(100));
|
||||
|
|
3
externals/xbyak/sample/test_util.cpp
vendored
3
externals/xbyak/sample/test_util.cpp
vendored
|
@ -104,9 +104,12 @@ void putCPUinfo()
|
|||
Core i7-3930K 6 2D
|
||||
*/
|
||||
cpu.putFamily();
|
||||
if (!cpu.has(Cpu::tINTEL)) return;
|
||||
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||
}
|
||||
printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel));
|
||||
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
||||
}
|
||||
|
||||
int main()
|
||||
|
|
2
externals/xbyak/sample/toyvm.cpp
vendored
2
externals/xbyak/sample/toyvm.cpp
vendored
|
@ -204,7 +204,7 @@ public:
|
|||
push(reg[r]);
|
||||
push('A' + r);
|
||||
push((int)str);
|
||||
call(Xbyak::CastTo<void*>(printf));
|
||||
call(reinterpret_cast<const void*>(printf));
|
||||
add(esp, 4 * 4);
|
||||
pop(ecx);
|
||||
pop(edx);
|
||||
|
|
123
externals/xbyak/test/jmp.cpp
vendored
123
externals/xbyak/test/jmp.cpp
vendored
|
@ -889,6 +889,34 @@ CYBOZU_TEST_AUTO(testNewLabel)
|
|||
}
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(returnLabel)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
xor_(eax, eax);
|
||||
Label L1 = L();
|
||||
test(eax, eax);
|
||||
Label exit;
|
||||
jnz(exit);
|
||||
inc(eax); // 1
|
||||
Label L2;
|
||||
call(L2);
|
||||
jmp(L1);
|
||||
L(L2);
|
||||
inc(eax); // 2
|
||||
ret();
|
||||
L(exit);
|
||||
inc(eax); // 3
|
||||
ret();
|
||||
}
|
||||
};
|
||||
Code code;
|
||||
int (*f)() = code.getCode<int (*)()>();
|
||||
int r = f();
|
||||
CYBOZU_TEST_EQUAL(r, 3);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(testAssign)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
|
@ -987,6 +1015,52 @@ struct GetAddressCode1 : Xbyak::CodeGenerator {
|
|||
}
|
||||
};
|
||||
|
||||
struct CodeLabelTable : Xbyak::CodeGenerator {
|
||||
enum { ret0 = 3 };
|
||||
enum { ret1 = 5 };
|
||||
enum { ret2 = 8 };
|
||||
CodeLabelTable()
|
||||
{
|
||||
using namespace Xbyak;
|
||||
#ifdef XBYAK64_WIN
|
||||
const Reg64& p0 = rcx;
|
||||
const Reg64& a = rax;
|
||||
#elif defined (XBYAK64_GCC)
|
||||
const Reg64& p0 = rdi;
|
||||
const Reg64& a = rax;
|
||||
#else
|
||||
const Reg32& p0 = edx;
|
||||
const Reg32& a = eax;
|
||||
mov(edx, ptr [esp + 4]);
|
||||
#endif
|
||||
Label labelTbl, L0, L1, L2;
|
||||
mov(a, labelTbl);
|
||||
jmp(ptr [a + p0 * sizeof(void*)]);
|
||||
L(labelTbl);
|
||||
putL(L0);
|
||||
putL(L1);
|
||||
putL(L2);
|
||||
L(L0);
|
||||
mov(a, ret0);
|
||||
ret();
|
||||
L(L1);
|
||||
mov(a, ret1);
|
||||
ret();
|
||||
L(L2);
|
||||
mov(a, ret2);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
CYBOZU_TEST_AUTO(LabelTable)
|
||||
{
|
||||
CodeLabelTable c;
|
||||
int (*f)(int) = c.getCode<int (*)(int)>();
|
||||
CYBOZU_TEST_EQUAL(f(0), c.ret0);
|
||||
CYBOZU_TEST_EQUAL(f(1), c.ret1);
|
||||
CYBOZU_TEST_EQUAL(f(2), c.ret2);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(getAddress1)
|
||||
{
|
||||
GetAddressCode1 c;
|
||||
|
@ -1143,11 +1217,56 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
|
|||
ret();
|
||||
}
|
||||
} code;
|
||||
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RE);
|
||||
code.setProtectModeRE();
|
||||
code.getCode<void (*)()>()();
|
||||
CYBOZU_TEST_EQUAL(*x0, 123);
|
||||
CYBOZU_TEST_EQUAL(*x1, 456);
|
||||
CYBOZU_TEST_EQUAL(buf[8], 99);
|
||||
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RW);
|
||||
code.setProtectModeRW();
|
||||
}
|
||||
#endif
|
||||
|
||||
struct ReleaseTestCode : Xbyak::CodeGenerator {
|
||||
ReleaseTestCode(Label& L1, Label& L2, Label& L3)
|
||||
{
|
||||
L(L1);
|
||||
jmp(L1);
|
||||
L(L2);
|
||||
jmp(L3); // not assigned
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
code must unlink label if code is destroyed
|
||||
*/
|
||||
CYBOZU_TEST_AUTO(release_label_after_code)
|
||||
{
|
||||
puts("---");
|
||||
{
|
||||
Label L1, L2, L3, L4, L5;
|
||||
{
|
||||
ReleaseTestCode code(L1, L2, L3);
|
||||
CYBOZU_TEST_ASSERT(L1.getId() > 0);
|
||||
CYBOZU_TEST_ASSERT(L1.getAddress() != 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getId() > 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getAddress() != 0);
|
||||
CYBOZU_TEST_ASSERT(L3.getId() > 0);
|
||||
CYBOZU_TEST_ASSERT(L3.getAddress() == 0); // L3 is not assigned
|
||||
code.assignL(L4, L1);
|
||||
L5 = L1;
|
||||
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
|
||||
}
|
||||
puts("code is released");
|
||||
CYBOZU_TEST_ASSERT(L1.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L1.getAddress() == 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L2.getAddress() == 0);
|
||||
// CYBOZU_TEST_ASSERT(L3.getId() == 0); // L3 is not assigned so not cleared
|
||||
CYBOZU_TEST_ASSERT(L3.getAddress() == 0);
|
||||
CYBOZU_TEST_ASSERT(L4.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L4.getAddress() == 0);
|
||||
CYBOZU_TEST_ASSERT(L5.getId() == 0);
|
||||
CYBOZU_TEST_ASSERT(L5.getAddress() == 0);
|
||||
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
|
||||
}
|
||||
}
|
||||
|
|
245
externals/xbyak/test/make_512.cpp
vendored
245
externals/xbyak/test/make_512.cpp
vendored
|
@ -73,7 +73,6 @@ const uint64 YMM_ER = 1ULL << 36;
|
|||
const uint64 VM32Y_K = 1ULL << 37;
|
||||
const uint64 IMM_2 = 1ULL << 38;
|
||||
const uint64 IMM = IMM_1 | IMM_2;
|
||||
const uint64 XMM = _XMM | _XMM2;
|
||||
const uint64 YMM = _YMM | _YMM2;
|
||||
const uint64 K = 1ULL << 43;
|
||||
const uint64 _ZMM = 1ULL << 44;
|
||||
|
@ -90,7 +89,10 @@ const uint64 ZMM_SAE = 1ULL << 48;
|
|||
const uint64 ZMM_ER = 1ULL << 49;
|
||||
#ifdef XBYAK64
|
||||
const uint64 _XMM3 = 1ULL << 50;
|
||||
#else
|
||||
const uint64 _XMM3 = 0;
|
||||
#endif
|
||||
const uint64 XMM = _XMM | _XMM2 | _XMM3;
|
||||
const uint64 XMM_SAE = 1ULL << 51;
|
||||
#ifdef XBYAK64
|
||||
const uint64 XMM_KZ = 1ULL << 52;
|
||||
|
@ -352,7 +354,8 @@ class Test {
|
|||
case VM32Y_K:
|
||||
return isXbyak_ ? "ptr [64+ymm13*2+r13] | k6" : "[64+ymm13*2+r13]{k6}";
|
||||
case VM32Z_K:
|
||||
return isXbyak_ ? "ptr [64+zmm13*2+r13] | k6" : "[64+zmm13*2+r13]{k6}";
|
||||
if (idx & 1) return isXbyak_ ? "ptr [64+zmm10*8+r9] | k6" : "[64+zmm10*8+r9]{k6}";
|
||||
return isXbyak_ ? "ptr [64+zmm30*2+r13] | k6" : "[64+zmm30*2+r13]{k6}";
|
||||
case VM32Z:
|
||||
return isXbyak_ ? "ptr [64+zmm13*2+rcx]" : "[64+zmm13*2+rcx]";
|
||||
case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
|
||||
|
@ -607,7 +610,7 @@ public:
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
put(p->name, K, _XMM, _XMM | MEM, IMM8);
|
||||
put(p->name, K, XMM, _XMM | MEM, IMM8);
|
||||
if (!p->supportYMM) continue;
|
||||
put(p->name, K, _YMM, _YMM | MEM, IMM8);
|
||||
put(p->name, K, _ZMM, _ZMM | MEM, IMM8);
|
||||
|
@ -626,10 +629,10 @@ public:
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM);
|
||||
put(p->name, XMM, XMM_SAE | XMM | MEM);
|
||||
}
|
||||
}
|
||||
put("vcomiss", _XMM3, XMM | MEM);
|
||||
put("vcomiss", XMM, _XMM3 | MEM);
|
||||
put("vcomiss", XMM, XMM_SAE);
|
||||
#endif
|
||||
}
|
||||
|
@ -673,10 +676,10 @@ public:
|
|||
"vpbroadcastq",
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
|
||||
put(tbl[i], XMM_KZ | ZMM_KZ, XMM | _MEM);
|
||||
}
|
||||
}
|
||||
put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
|
||||
put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, XMM | _MEM);
|
||||
put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
|
||||
put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
|
||||
put("vbroadcasti32x8", ZMM_KZ, _MEM);
|
||||
|
@ -684,14 +687,14 @@ public:
|
|||
}
|
||||
void putMisc1()
|
||||
{
|
||||
put("vmaskmovps", XMM, XMM, MEM);
|
||||
put("vmaskmovps", _XMM, _XMM, MEM);
|
||||
put("vmaskmovps", YMM, YMM, MEM);
|
||||
|
||||
put("vmaskmovpd", YMM, YMM, MEM);
|
||||
put("vmaskmovpd", XMM, XMM, MEM);
|
||||
put("vmaskmovpd", _XMM, _XMM, MEM);
|
||||
|
||||
put("vmaskmovps", MEM, XMM, XMM);
|
||||
put("vmaskmovpd", MEM, XMM, XMM);
|
||||
put("vmaskmovps", MEM, _XMM, _XMM);
|
||||
put("vmaskmovpd", MEM, _XMM, _XMM);
|
||||
|
||||
put("vbroadcastf128", YMM, MEM);
|
||||
put("vbroadcasti128", YMM, MEM);
|
||||
|
@ -710,8 +713,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
put("vinsertf128", YMM, YMM, XMM | MEM, IMM8);
|
||||
put("vinserti128", YMM, YMM, XMM | MEM, IMM8);
|
||||
put("vinsertf128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8);
|
||||
put("vinserti128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8);
|
||||
put("vperm2f128", YMM, YMM, YMM | MEM, IMM8);
|
||||
put("vperm2i128", YMM, YMM, YMM | MEM, IMM8);
|
||||
|
||||
|
@ -721,9 +724,9 @@ public:
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const char *name = tbl[i];
|
||||
put(name, XMM, XMM, MEM);
|
||||
put(name, _XMM, _XMM, MEM);
|
||||
put(name, YMM, YMM, MEM);
|
||||
put(name, MEM, XMM, XMM);
|
||||
put(name, MEM, _XMM, _XMM);
|
||||
put(name, MEM, YMM, YMM);
|
||||
}
|
||||
}
|
||||
|
@ -760,29 +763,29 @@ public:
|
|||
put(name, MEM, ZMM);
|
||||
put(name, ZMM, MEM);
|
||||
#ifdef XBYAK64
|
||||
put(name, MEM, _XMM3);
|
||||
put(name, _XMM3, MEM);
|
||||
put(name, MEM, XMM);
|
||||
put(name, XMM, MEM);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
void put_vmov()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
put("vmovd", _XMM3, MEM|REG32);
|
||||
put("vmovd", MEM|REG32, _XMM3);
|
||||
put("vmovq", _XMM3, MEM|REG64|XMM);
|
||||
put("vmovq", MEM|REG64|XMM, _XMM3);
|
||||
put("vmovhlps", _XMM3, _XMM3, _XMM3);
|
||||
put("vmovlhps", _XMM3, _XMM3, _XMM3);
|
||||
put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM);
|
||||
put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
|
||||
put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
|
||||
put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
|
||||
put("vmovd", XMM, MEM|REG32);
|
||||
put("vmovd", MEM|REG32, XMM);
|
||||
put("vmovq", XMM, MEM|REG64|XMM);
|
||||
put("vmovq", MEM|REG64|XMM, XMM);
|
||||
put("vmovhlps", XMM, _XMM3, _XMM3);
|
||||
put("vmovlhps", XMM, _XMM3, _XMM3);
|
||||
put("vmovntdqa", XMM|_YMM3|ZMM, MEM);
|
||||
put("vmovntdq", MEM, XMM | _YMM3 | ZMM);
|
||||
put("vmovntpd", MEM, XMM | _YMM3 | ZMM);
|
||||
put("vmovntps", MEM, XMM | _YMM3 | ZMM);
|
||||
|
||||
put("vmovsd", XMM_KZ, _XMM3, _XMM3);
|
||||
put("vmovsd", XMM_KZ, XMM, _XMM3);
|
||||
put("vmovsd", XMM_KZ, MEM);
|
||||
put("vmovsd", MEM_K, XMM);
|
||||
put("vmovss", XMM_KZ, _XMM3, _XMM3);
|
||||
put("vmovss", XMM_KZ, XMM, _XMM3);
|
||||
put("vmovss", XMM_KZ, MEM);
|
||||
put("vmovss", MEM_K, XMM);
|
||||
|
||||
|
@ -797,7 +800,7 @@ public:
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const char *name = tbl[i];
|
||||
put(name, XMM_KZ, _XMM, _XMM | MEM, IMM);
|
||||
put(name, XMM_KZ, XMM, _XMM | MEM, IMM);
|
||||
put(name, _YMM3, _YMM3, _YMM3 | _MEM, IMM);
|
||||
put(name, _ZMM, _ZMM, _ZMM | _MEM, IMM);
|
||||
}
|
||||
|
@ -810,7 +813,7 @@ public:
|
|||
"vmovlps",
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
put(tbl[i], _XMM3, _XMM3, MEM);
|
||||
put(tbl[i], XMM, _XMM3, MEM);
|
||||
put(tbl[i], MEM, _XMM3);
|
||||
}
|
||||
}
|
||||
|
@ -836,11 +839,11 @@ public:
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
put(p.name, _XMM|XMM_KZ, _XMM|MEM);
|
||||
put(p.name, XMM|XMM_KZ, _XMM|MEM);
|
||||
put(p.name, _YMM|YMM_KZ, _YMM|MEM);
|
||||
put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
|
||||
if (!p.M_X) continue;
|
||||
put(p.name, MEM|MEM_K, _XMM);
|
||||
put(p.name, MEM|MEM_K, XMM);
|
||||
put(p.name, MEM|MEM_K, _YMM);
|
||||
put(p.name, MEM|MEM_K, _ZMM);
|
||||
}
|
||||
|
@ -857,7 +860,7 @@ public:
|
|||
put("vpabsd", ZMM_KZ, M_1to16 | _MEM);
|
||||
put("vpabsq", ZMM_KZ, M_1to8 | _MEM);
|
||||
|
||||
put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM);
|
||||
put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, XMM | _MEM);
|
||||
put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
|
||||
|
||||
put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
|
||||
|
@ -879,7 +882,7 @@ public:
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
|
||||
put(p.name, XMM_KZ, XMM, _XMM|p.mem);
|
||||
}
|
||||
}
|
||||
void put512_X3()
|
||||
|
@ -891,54 +894,54 @@ public:
|
|||
uint64_t x2;
|
||||
uint64_t xm;
|
||||
} tbl[] = {
|
||||
{ "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpacksswb", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
|
||||
{ "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||
|
||||
{ "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpackssdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
|
||||
{ "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
||||
|
||||
{ "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpackusdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
|
||||
{ "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
||||
|
||||
{ "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpackuswb", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
|
||||
{ "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||
|
||||
{ "vpaddb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpaddb", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpaddq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpaddsb", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||
|
||||
{ "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpaddsw", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||
|
||||
{ "vpaddusb", XMM_KZ, _XMM, _XMM | MEM },
|
||||
{ "vpaddusb", XMM_KZ, XMM, _XMM | MEM },
|
||||
{ "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||
|
||||
{ "vpaddusw", XMM_KZ, _XMM, _XMM | MEM },
|
||||
{ "vpaddusw", XMM_KZ, XMM, _XMM | MEM },
|
||||
{ "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||
|
||||
{ "vpsubb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpsubw", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpsubb", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpsubw", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpsubd", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpsubq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpsubsb", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||
|
||||
{ "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpsubsw", XMM_KZ, XMM, _XMM | _MEM },
|
||||
{ "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||
|
||||
{ "vpsubusb", XMM_KZ, _XMM, _XMM | MEM },
|
||||
{ "vpsubusb", XMM_KZ, XMM, _XMM | MEM },
|
||||
{ "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||
|
||||
{ "vpsubusw", XMM_KZ, _XMM, _XMM | MEM },
|
||||
{ "vpsubusw", XMM_KZ, XMM, _XMM | MEM },
|
||||
{ "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||
|
||||
{ "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
||||
|
@ -983,137 +986,137 @@ public:
|
|||
{ "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
|
||||
{ "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
|
||||
|
||||
{ "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 },
|
||||
{ "vpslldq", XMM, _XMM3 | _MEM, IMM8 },
|
||||
{ "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
|
||||
{ "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
|
||||
|
||||
{ "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 },
|
||||
{ "vpsrldq", XMM, _XMM3 | _MEM, IMM8 },
|
||||
{ "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
|
||||
{ "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
|
||||
|
||||
{ "vpsraw", XMM_KZ, _XMM | _MEM, IMM8 },
|
||||
{ "vpsraw", XMM_KZ, XMM | _MEM, IMM8 },
|
||||
{ "vpsraw", ZMM_KZ, _ZMM | _MEM, IMM8 },
|
||||
|
||||
{ "vpsrad", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
||||
{ "vpsrad", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 },
|
||||
{ "vpsrad", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
||||
|
||||
{ "vpsraq", XMM, XMM, IMM8 },
|
||||
{ "vpsraq", XMM_KZ, _XMM | M_1to2 | _MEM, IMM8 },
|
||||
{ "vpsraq", XMM_KZ, XMM | M_1to2 | _MEM, IMM8 },
|
||||
{ "vpsraq", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 },
|
||||
|
||||
{ "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 },
|
||||
{ "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 },
|
||||
{ "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
|
||||
{ "vpsllw", XMM, _XMM3 | _MEM, IMM8 },
|
||||
{ "vpslld", XMM, _XMM3 | _MEM | M_1to4, IMM8 },
|
||||
{ "vpsllq", XMM, _XMM3 | _MEM | M_1to2, IMM8 },
|
||||
|
||||
{ "vpsrlw", XMM_KZ, _XMM | _MEM, IMM8 },
|
||||
{ "vpsrlw", XMM_KZ, XMM | _MEM, IMM8 },
|
||||
{ "vpsrlw", ZMM_KZ, _ZMM | _MEM, IMM8 },
|
||||
|
||||
{ "vpsrld", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
||||
{ "vpsrld", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 },
|
||||
{ "vpsrld", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
||||
|
||||
{ "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
|
||||
{ "vpsrlq", XMM, _XMM3 | _MEM | M_1to2, IMM8 },
|
||||
{ "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
|
||||
|
||||
{ "vpsravw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsravw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsravw", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpsravd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsravd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsravd", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vpsravq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsravq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsravq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsllvw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsllvw", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsllvd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsllvd", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsllvq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsllvq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsrlvw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsrlvw", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsrlvd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsrlvd", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsrlvq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsrlvq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpshufb", _XMM | XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpshufb", XMM | XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpshufb", ZMM_KZ, _ZMM, _MEM },
|
||||
|
||||
{ "vpshufhw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
||||
{ "vpshufhw", XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
||||
{ "vpshufhw", ZMM_KZ, _MEM, IMM8 },
|
||||
|
||||
{ "vpshuflw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
||||
{ "vpshuflw", XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
||||
{ "vpshuflw", ZMM_KZ, _MEM, IMM8 },
|
||||
|
||||
{ "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
||||
{ "vpshufd", XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
||||
{ "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
||||
|
||||
{ "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
||||
{ "vporq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
||||
{ "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpxord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||
{ "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
||||
{ "vpxorq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
||||
{ "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpsadbw", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpsadbw", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpsadbw", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 | _MEM },
|
||||
{ "vpmuldq", XMM, _XMM, _XMM | M_1to2 | _MEM },
|
||||
{ "vpmuldq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpmulhrsw", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpmulhrsw", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
|
||||
|
||||
{ "vpmulhuw", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpmulhuw", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
|
||||
|
||||
{ "vpmulhw", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpmulhw", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpmulhw", ZMM_KZ, _ZMM, _MEM },
|
||||
|
||||
{ "vpmullw", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpmullw", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpmullw", ZMM_KZ, _ZMM, _MEM },
|
||||
|
||||
{ "vpmulld", _XMM3, _XMM, M_1to4 | _MEM },
|
||||
{ "vpmulld", XMM, _XMM, M_1to4 | _MEM },
|
||||
{ "vpmulld", ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vpmullq", _XMM3, _XMM, M_1to2 | _MEM },
|
||||
{ "vpmullq", XMM, _XMM, M_1to2 | _MEM },
|
||||
{ "vpmullq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpmuludq", _XMM3, _XMM, M_1to2 | _MEM },
|
||||
{ "vpmuludq", XMM, _XMM, M_1to2 | _MEM },
|
||||
{ "vpmuludq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpunpckhbw", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpunpckhbw", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpunpckhbw", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpunpckhwd", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpunpckhwd", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpunpckhwd", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpunpckhdq", _XMM3, _XMM, M_1to4 | _MEM },
|
||||
{ "vpunpckhdq", XMM, _XMM, M_1to4 | _MEM },
|
||||
{ "vpunpckhdq", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vpunpckhqdq", _XMM3, _XMM, M_1to2 | _MEM },
|
||||
{ "vpunpckhqdq", XMM, _XMM, M_1to2 | _MEM },
|
||||
{ "vpunpckhqdq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vpunpcklbw", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpunpcklbw", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpunpcklbw", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpunpcklwd", _XMM3, _XMM, _XMM | _MEM },
|
||||
{ "vpunpcklwd", XMM, _XMM, _XMM | _MEM },
|
||||
{ "vpunpcklwd", _ZMM, _ZMM, _MEM },
|
||||
|
||||
{ "vpunpckldq", _XMM3, _XMM, M_1to4 | _MEM },
|
||||
{ "vpunpckldq", XMM, _XMM, M_1to4 | _MEM },
|
||||
{ "vpunpckldq", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||
|
||||
{ "vpunpcklqdq", _XMM3, _XMM, M_1to2 | _MEM },
|
||||
{ "vpunpcklqdq", XMM, _XMM, M_1to2 | _MEM },
|
||||
{ "vpunpcklqdq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||
|
||||
{ "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
|
||||
|
@ -1126,7 +1129,7 @@ public:
|
|||
{ "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
|
||||
{ "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
|
||||
|
||||
{ "vextractps", REG32 | _MEM, _XMM3, IMM8 },
|
||||
{ "vextractps", REG32 | _MEM, XMM, IMM8 },
|
||||
|
||||
{ "vpermb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||
{ "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||
|
@ -1175,7 +1178,7 @@ public:
|
|||
uint64_t xm;
|
||||
} tbl[] = {
|
||||
#ifdef XBYAK64
|
||||
{ "vinsertps", _XMM, _XMM, _XMM3 | _MEM },
|
||||
{ "vinsertps", XMM, _XMM, _XMM3 | _MEM },
|
||||
|
||||
{ "vshufpd", XMM_KZ, _XMM, M_1to2 | _MEM },
|
||||
{ "vshufpd", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||
|
@ -1208,14 +1211,14 @@ public:
|
|||
put(p.name, p.x1, p.x2, p.xm, IMM8);
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
put("vpextrb", _REG64 | _MEM, _XMM3, IMM8);
|
||||
put("vpextrw", _REG64 | _MEM, _XMM3, IMM8);
|
||||
put("vpextrd", _REG32 | _MEM, _XMM3, IMM8);
|
||||
put("vpextrq", _REG64 | _MEM, _XMM3, IMM8);
|
||||
put("vpinsrb", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
|
||||
put("vpinsrw", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
|
||||
put("vpinsrd", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
|
||||
put("vpinsrq", _XMM3, _XMM3, _REG64 | _MEM, IMM8);
|
||||
put("vpextrb", _REG64 | _MEM, XMM, IMM8);
|
||||
put("vpextrw", _REG64 | _MEM, XMM, IMM8);
|
||||
put("vpextrd", _REG32 | _MEM, XMM, IMM8);
|
||||
put("vpextrq", _REG64 | _MEM, XMM, IMM8);
|
||||
put("vpinsrb", XMM, _XMM3, _REG32 | _MEM, IMM8);
|
||||
put("vpinsrw", XMM, _XMM3, _REG32 | _MEM, IMM8);
|
||||
put("vpinsrd", XMM, _XMM3, _REG32 | _MEM, IMM8);
|
||||
put("vpinsrq", XMM, _XMM3, _REG64 | _MEM, IMM8);
|
||||
#endif
|
||||
}
|
||||
void put512_FMA()
|
||||
|
@ -1345,7 +1348,7 @@ public:
|
|||
} else if (suf == "ps") {
|
||||
mem = M_1to4;
|
||||
}
|
||||
put(p, _XMM3 | XMM_KZ, _XMM, mem | _MEM);
|
||||
put(p, XMM | XMM_KZ, _XMM, mem | _MEM);
|
||||
if (!sufTbl[j].supportYMM) continue;
|
||||
mem = 0;
|
||||
if (suf == "pd") {
|
||||
|
@ -1466,23 +1469,23 @@ public:
|
|||
put("vcvtqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
|
||||
put("vcvtqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
|
||||
|
||||
put("vcvtsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
||||
put("vcvtsd2si", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||
|
||||
put("vcvtsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
||||
put("vcvtsd2usi", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||
|
||||
put("vcvtsd2ss", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_ER);
|
||||
put("vcvtsd2ss", XMM_KZ, XMM, _XMM3 | _MEM | XMM_ER);
|
||||
|
||||
put("vcvtsi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtsi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtsi2sd", XMM, XMM_ER, REG64);
|
||||
|
||||
put("vcvtsi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtsi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtsi2ss", XMM, XMM_ER, REG32 | REG64);
|
||||
|
||||
put("vcvtss2sd", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_SAE);
|
||||
put("vcvtss2sd", XMM_KZ, XMM, _XMM3 | _MEM | XMM_SAE);
|
||||
|
||||
put("vcvtss2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
||||
put("vcvtss2si", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||
|
||||
put("vcvtss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
||||
put("vcvtss2usi", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||
|
||||
put("vcvtpd2dq", XMM_KZ, _XMM | M_xword | M_1to2);
|
||||
put("vcvtpd2dq", XMM_KZ, _YMM | M_yword | MY_1to4);
|
||||
|
@ -1516,13 +1519,13 @@ public:
|
|||
put("vcvttps2uqq", YMM_KZ, _XMM | _MEM | M_1to4);
|
||||
put("vcvttps2uqq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE);
|
||||
|
||||
put("vcvttsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
||||
put("vcvttsd2si", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||
|
||||
put("vcvttsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
||||
put("vcvttsd2usi", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||
|
||||
put("vcvttss2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
||||
put("vcvttss2si", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||
|
||||
put("vcvttss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
||||
put("vcvttss2usi", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||
|
||||
put("vcvtudq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
|
||||
put("vcvtudq2pd", YMM_KZ, _XMM | _MEM | M_1to4);
|
||||
|
@ -1540,10 +1543,10 @@ public:
|
|||
put("vcvtuqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
|
||||
put("vcvtuqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
|
||||
|
||||
put("vcvtusi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtusi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtusi2sd", XMM, XMM_ER, REG64);
|
||||
|
||||
put("vcvtusi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtusi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
|
||||
#endif
|
||||
}
|
||||
|
|
4
externals/xbyak/test/rip-label-imm.cpp
vendored
4
externals/xbyak/test/rip-label-imm.cpp
vendored
|
@ -40,8 +40,8 @@ struct Code : Xbyak::CodeGenerator {
|
|||
cmpss(xmm0, ptr[rip + label], 0);
|
||||
test(dword[rip + label], 33);
|
||||
bt(dword[rip + label ], 3);
|
||||
vblendpd(xmm0, dword[rip + label], 3);
|
||||
vpalignr(xmm0, qword[rip + label], 4);
|
||||
vblendpd(xmm0, xmm0, dword[rip + label], 3);
|
||||
vpalignr(xmm0, xmm0, qword[rip + label], 4);
|
||||
vextractf128(dword[rip + label], ymm3, 12);
|
||||
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
|
||||
vcvtps2ph(ptr[rip + label], xmm2, 44);
|
||||
|
|
88
externals/xbyak/test/sf_test.cpp
vendored
88
externals/xbyak/test/sf_test.cpp
vendored
|
@ -129,6 +129,55 @@ struct Code : public Xbyak::CodeGenerator {
|
|||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
|
||||
*/
|
||||
void gen13()
|
||||
{
|
||||
StackFrame sf(this, 1, 13);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, sf.t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, sf.t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
same as gen13
|
||||
*/
|
||||
void gen14()
|
||||
{
|
||||
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
|
||||
Pack t = sf.t;
|
||||
t.append(rcx);
|
||||
t.append(rdx);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
return (1 << 15) - 1;
|
||||
*/
|
||||
void gen15()
|
||||
{
|
||||
StackFrame sf(this, 0, 14, 8);
|
||||
Pack t = sf.t;
|
||||
t.append(rax);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
mov(t[i], 1 << i);
|
||||
}
|
||||
mov(qword[rsp], 0);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
add(ptr[rsp], t[i]);
|
||||
}
|
||||
mov(rax, ptr[rsp]);
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
|
@ -152,8 +201,14 @@ struct Code2 : Xbyak::CodeGenerator {
|
|||
add(rax, sf.p[i]);
|
||||
}
|
||||
}
|
||||
void gen2(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
mov(rax, rsp);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int errNum = 0;
|
||||
void check(int x, int y)
|
||||
{
|
||||
|
@ -167,19 +222,19 @@ void verify(const Xbyak::uint8 *f, int pNum)
|
|||
{
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
check(1, Xbyak::CastTo<int (*)()>(f)());
|
||||
check(1, reinterpret_cast<int (*)()>(f)());
|
||||
return;
|
||||
case 1:
|
||||
check(11, Xbyak::CastTo<int (*)(int)>(f)(10));
|
||||
check(11, reinterpret_cast<int (*)(int)>(f)(10));
|
||||
return;
|
||||
case 2:
|
||||
check(111, Xbyak::CastTo<int (*)(int, int)>(f)(10, 100));
|
||||
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
|
||||
return;
|
||||
case 3:
|
||||
check(1111, Xbyak::CastTo<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||
return;
|
||||
case 4:
|
||||
check(11111, Xbyak::CastTo<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||
return;
|
||||
default:
|
||||
printf("ERR pNum=%d\n", pNum);
|
||||
|
@ -212,6 +267,15 @@ void testAll()
|
|||
const Xbyak::uint8 *f = code.getCurr();
|
||||
code.gen(pNum, tNum | opt, stackSize);
|
||||
verify(f, pNum);
|
||||
/*
|
||||
check rsp is 16-byte aligned if stackSize > 0
|
||||
*/
|
||||
if (stackSize > 0) {
|
||||
Code2 c2;
|
||||
c2.gen2(pNum, tNum | opt, stackSize);
|
||||
uint64_t addr = c2.getCode<uint64_t (*)()>()();
|
||||
check(addr % 16, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -268,6 +332,20 @@ void testPartial()
|
|||
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen12();
|
||||
check(24, f12(3, 5, 7, 9));
|
||||
|
||||
{
|
||||
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
|
||||
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen13();
|
||||
check(91, f13(tbl));
|
||||
|
||||
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen14();
|
||||
check(91, f14(tbl));
|
||||
}
|
||||
int (*f15)() = code.getCurr<int (*)()>();
|
||||
code.gen15();
|
||||
check((1 << 15) - 1, f15());
|
||||
}
|
||||
|
||||
void put(const Xbyak::util::Pack& p)
|
||||
|
|
108
externals/xbyak/xbyak/xbyak.h
vendored
108
externals/xbyak/xbyak/xbyak.h
vendored
|
@ -40,6 +40,8 @@
|
|||
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
|
||||
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
|
||||
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
|
||||
|
@ -49,16 +51,22 @@
|
|||
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
|
||||
*/
|
||||
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
|
||||
#include <tr1/unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <tr1/unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#else
|
||||
#include <set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::set
|
||||
#include <map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
|
||||
|
@ -105,7 +113,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x5670 /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x5770 /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -178,7 +186,8 @@ enum {
|
|||
ERR_INVALID_ZERO,
|
||||
ERR_INVALID_RIP_IN_AUTO_GROW,
|
||||
ERR_INVALID_MIB_ADDRESS,
|
||||
ERR_INTERNAL
|
||||
ERR_INTERNAL,
|
||||
ERR_X2APIC_IS_NOT_SUPPORTED
|
||||
};
|
||||
|
||||
class Error : public std::exception {
|
||||
|
@ -240,6 +249,7 @@ public:
|
|||
"invalid rip in AutoGrow",
|
||||
"invalid mib address",
|
||||
"internal error",
|
||||
"x2APIC is not supported"
|
||||
};
|
||||
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
||||
return errTbl[err_];
|
||||
|
@ -617,6 +627,12 @@ struct RegRip {
|
|||
const Label* label_;
|
||||
bool isAddr_;
|
||||
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
||||
friend const RegRip operator+(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator-(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
|
@ -786,6 +802,7 @@ inline RegExp operator-(const RegExp& e, size_t disp)
|
|||
|
||||
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
||||
void *const AutoGrow = (void*)1; //-V566
|
||||
void *const DontSetProtectRWE = (void*)2; //-V566
|
||||
|
||||
class CodeArray {
|
||||
enum Type {
|
||||
|
@ -825,6 +842,7 @@ protected:
|
|||
size_t size_;
|
||||
bool isCalledCalcJmpAddress_;
|
||||
|
||||
bool useProtect() const { return alloc_->useProtect(); }
|
||||
/*
|
||||
allocate new memory and copy old data to the new area
|
||||
*/
|
||||
|
@ -848,7 +866,6 @@ protected:
|
|||
uint64 disp = i->getVal(top_);
|
||||
rewrite(i->codeOffset, disp, i->jmpSize);
|
||||
}
|
||||
if (alloc_->useProtect() && !protect(top_, size_, PROTECT_RWE)) throw Error(ERR_CANT_PROTECT);
|
||||
isCalledCalcJmpAddress_ = true;
|
||||
}
|
||||
public:
|
||||
|
@ -858,7 +875,7 @@ public:
|
|||
PROTECT_RE = 2 // read/exec
|
||||
};
|
||||
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
|
||||
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
|
||||
: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
|
||||
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
|
||||
, maxSize_(maxSize)
|
||||
, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
|
||||
|
@ -866,7 +883,7 @@ public:
|
|||
, isCalledCalcJmpAddress_(false)
|
||||
{
|
||||
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
|
||||
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, PROTECT_RWE)) {
|
||||
if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
|
||||
alloc_->free(top_);
|
||||
throw Error(ERR_CANT_PROTECT);
|
||||
}
|
||||
|
@ -874,10 +891,19 @@ public:
|
|||
virtual ~CodeArray()
|
||||
{
|
||||
if (isAllocType()) {
|
||||
if (alloc_->useProtect()) protect(top_, maxSize_, PROTECT_RW);
|
||||
if (useProtect()) setProtectModeRW(false);
|
||||
alloc_->free(top_);
|
||||
}
|
||||
}
|
||||
bool setProtectMode(ProtectMode mode, bool throwException = true)
|
||||
{
|
||||
bool isOK = protect(top_, maxSize_, mode);
|
||||
if (isOK) return true;
|
||||
if (throwException) throw Error(ERR_CANT_PROTECT);
|
||||
return false;
|
||||
}
|
||||
bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
|
||||
bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
|
||||
void resetSize()
|
||||
{
|
||||
size_ = 0;
|
||||
|
@ -909,10 +935,10 @@ public:
|
|||
void dq(uint64 code) { db(code, 8); }
|
||||
const uint8 *getCode() const { return top_; }
|
||||
template<class F>
|
||||
const F getCode() const { return CastTo<F>(top_); }
|
||||
const F getCode() const { return reinterpret_cast<F>(top_); }
|
||||
const uint8 *getCurr() const { return &top_[size_]; }
|
||||
template<class F>
|
||||
const F getCurr() const { return CastTo<F>(&top_[size_]); }
|
||||
const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
|
||||
size_t getSize() const { return size_; }
|
||||
void setSize(size_t size)
|
||||
{
|
||||
|
@ -995,6 +1021,9 @@ public:
|
|||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||
#ifndef NDEBUG
|
||||
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
|
||||
#endif
|
||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||
#else
|
||||
return true;
|
||||
|
@ -1115,6 +1144,7 @@ public:
|
|||
Label(const Label& rhs);
|
||||
Label& operator=(const Label& rhs);
|
||||
~Label();
|
||||
void clear() { mgr = 0; id = 0; }
|
||||
int getId() const { return id; }
|
||||
const uint8 *getAddress() const;
|
||||
|
||||
|
@ -1153,6 +1183,7 @@ class LabelManager {
|
|||
};
|
||||
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
|
||||
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
|
||||
typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
|
||||
|
||||
CodeArray *base_;
|
||||
// global : stateList_.front(), local : stateList_.back()
|
||||
|
@ -1160,6 +1191,7 @@ class LabelManager {
|
|||
mutable int labelId_;
|
||||
ClabelDefList clabelDefList_;
|
||||
ClabelUndefList clabelUndefList_;
|
||||
LabelPtrList labelPtrList_;
|
||||
|
||||
int getId(const Label& label) const
|
||||
{
|
||||
|
@ -1208,9 +1240,14 @@ class LabelManager {
|
|||
return true;
|
||||
}
|
||||
friend class Label;
|
||||
void incRefCount(int id) { clabelDefList_[id].refCount++; }
|
||||
void decRefCount(int id)
|
||||
void incRefCount(int id, Label *label)
|
||||
{
|
||||
clabelDefList_[id].refCount++;
|
||||
labelPtrList_.insert(label);
|
||||
}
|
||||
void decRefCount(int id, Label *label)
|
||||
{
|
||||
labelPtrList_.erase(label);
|
||||
ClabelDefList::iterator i = clabelDefList_.find(id);
|
||||
if (i == clabelDefList_.end()) return;
|
||||
if (i->second.refCount == 1) {
|
||||
|
@ -1229,11 +1266,23 @@ class LabelManager {
|
|||
#endif
|
||||
return !list.empty();
|
||||
}
|
||||
// detach all labels linked to LabelManager
|
||||
void resetLabelPtrList()
|
||||
{
|
||||
for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
|
||||
(*i)->clear();
|
||||
}
|
||||
labelPtrList_.clear();
|
||||
}
|
||||
public:
|
||||
LabelManager()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
~LabelManager()
|
||||
{
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void reset()
|
||||
{
|
||||
base_ = 0;
|
||||
|
@ -1243,6 +1292,7 @@ public:
|
|||
stateList_.push_back(SlabelState());
|
||||
clabelDefList_.clear();
|
||||
clabelUndefList_.clear();
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void enterLocal()
|
||||
{
|
||||
|
@ -1275,10 +1325,11 @@ public:
|
|||
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
|
||||
define_inner(st.defList, st.undefList, label, base_->getSize());
|
||||
}
|
||||
void defineClabel(const Label& label)
|
||||
void defineClabel(Label& label)
|
||||
{
|
||||
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
|
||||
label.mgr = this;
|
||||
labelPtrList_.insert(&label);
|
||||
}
|
||||
void assign(Label& dst, const Label& src)
|
||||
{
|
||||
|
@ -1286,6 +1337,7 @@ public:
|
|||
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
|
||||
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
|
||||
dst.mgr = this;
|
||||
labelPtrList_.insert(&dst);
|
||||
}
|
||||
bool getOffset(size_t *offset, std::string& label) const
|
||||
{
|
||||
|
@ -1333,19 +1385,19 @@ inline Label::Label(const Label& rhs)
|
|||
{
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
}
|
||||
inline Label& Label::operator=(const Label& rhs)
|
||||
{
|
||||
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
return *this;
|
||||
}
|
||||
inline Label::~Label()
|
||||
{
|
||||
if (id && mgr) mgr->decRefCount(id);
|
||||
if (id && mgr) mgr->decRefCount(id, this);
|
||||
}
|
||||
inline const uint8* Label::getAddress() const
|
||||
{
|
||||
|
@ -1463,6 +1515,7 @@ private:
|
|||
T_B64 = 1 << 27, // m64bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_XXX
|
||||
};
|
||||
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
||||
|
@ -1500,7 +1553,7 @@ private:
|
|||
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
|
||||
return v;
|
||||
}
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
|
||||
{
|
||||
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
|
||||
int w = (type & T_EW1) ? 1 : 0;
|
||||
|
@ -1543,7 +1596,7 @@ private:
|
|||
}
|
||||
}
|
||||
}
|
||||
bool Vp = !(v ? v->isExtIdx2() : 0);
|
||||
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
|
||||
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
|
||||
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
||||
db(0x62);
|
||||
|
@ -1935,10 +1988,11 @@ private:
|
|||
const Address& addr = op2.getAddress();
|
||||
const RegExp& regExp = addr.getRegExp();
|
||||
const Reg& base = regExp.getBase();
|
||||
const Reg& index = regExp.getIndex();
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
int disp8N = 0;
|
||||
bool x = regExp.getIndex().isExtIdx();
|
||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||
bool x = index.isExtIdx();
|
||||
if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||
int aaa = addr.getOpmaskIdx();
|
||||
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
|
||||
bool b = false;
|
||||
|
@ -1946,8 +2000,8 @@ private:
|
|||
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
|
||||
b = true;
|
||||
}
|
||||
int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
|
||||
int VL = regExp.isVsib() ? index.getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
|
||||
} else {
|
||||
vex(r, base, p1, type, code, x);
|
||||
}
|
||||
|
@ -2147,7 +2201,8 @@ public:
|
|||
const Segment es, cs, ss, ds, fs, gs;
|
||||
#endif
|
||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||
void L(const Label& label) { labelMgr_.defineClabel(label); }
|
||||
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||
Label L() { Label label; L(label); return label; }
|
||||
void inLocalLabel() { labelMgr_.enterLocal(); }
|
||||
void outLocalLabel() { labelMgr_.leaveLocal(); }
|
||||
/*
|
||||
|
@ -2178,7 +2233,7 @@ public:
|
|||
// call(function pointer)
|
||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
template<class Ret, class... Params>
|
||||
void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
|
||||
void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
|
||||
#endif
|
||||
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
|
||||
|
||||
|
@ -2436,11 +2491,16 @@ public:
|
|||
MUST call ready() to complete generating code if you use AutoGrow mode.
|
||||
It is not necessary for the other mode if hasUndefinedLabel() is true.
|
||||
*/
|
||||
void ready()
|
||||
void ready(ProtectMode mode = PROTECT_RWE)
|
||||
{
|
||||
if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
|
||||
if (isAutoGrow()) calcJmpAddress();
|
||||
if (isAutoGrow()) {
|
||||
calcJmpAddress();
|
||||
if (useProtect()) setProtectMode(mode);
|
||||
}
|
||||
}
|
||||
// set read/exec
|
||||
void readyRE() { return ready(PROTECT_RE); }
|
||||
#ifdef XBYAK_TEST
|
||||
void dump(bool doClear = true)
|
||||
{
|
||||
|
|
24
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
24
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "5.67"; }
|
||||
const char *getVersionString() const { return "5.77"; }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
@ -1023,7 +1023,7 @@ void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
|||
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
|
||||
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
|
||||
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
|
||||
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
||||
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
||||
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
|
||||
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
|
||||
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
|
||||
|
@ -1206,28 +1206,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm,
|
|||
void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
|
||||
void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
|
||||
void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
|
||||
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
|
||||
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
||||
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
|
||||
void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
|
||||
void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
|
||||
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
|
||||
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
|
||||
void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
|
||||
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
|
||||
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
|
||||
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
||||
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
|
||||
void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
|
||||
void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
|
||||
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
|
||||
void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
|
||||
void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
|
||||
|
|
199
externals/xbyak/xbyak/xbyak_util.h
vendored
199
externals/xbyak/xbyak/xbyak_util.h
vendored
|
@ -9,6 +9,11 @@
|
|||
*/
|
||||
#include "xbyak.h"
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define XBYAK_INTEL_CPU_SPECIFIC
|
||||
#endif
|
||||
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||
|
@ -47,14 +52,30 @@
|
|||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Xbyak { namespace util {
|
||||
|
||||
typedef enum {
|
||||
SmtLevel = 1,
|
||||
CoreLevel = 2
|
||||
} IntelCpuTopologyLevel;
|
||||
|
||||
/**
|
||||
CPU detection class
|
||||
*/
|
||||
class Cpu {
|
||||
uint64 type_;
|
||||
//system topology
|
||||
bool x2APIC_supported_;
|
||||
static const size_t maxTopologyLevels = 2;
|
||||
unsigned int numCores_[maxTopologyLevels];
|
||||
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int dataCacheSize_[maxNumberCacheLevels];
|
||||
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
|
||||
unsigned int dataCacheLevels_;
|
||||
|
||||
unsigned int get32bitAsBE(const char *x) const
|
||||
{
|
||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||
|
@ -65,7 +86,7 @@ class Cpu {
|
|||
}
|
||||
void setFamily()
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
getCpuid(1, data);
|
||||
stepping = data[0] & mask(4);
|
||||
model = (data[0] >> 4) & mask(4);
|
||||
|
@ -88,6 +109,39 @@ class Cpu {
|
|||
{
|
||||
return (val >> base) & ((1u << (end - base)) - 1);
|
||||
}
|
||||
void setNumCores()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
||||
unsigned int data[4] = {};
|
||||
|
||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
/*
|
||||
if leaf 11 exists(x2APIC is supported),
|
||||
we use it to get the number of smt cores and cores on socket
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
x2APIC_supported_ = true;
|
||||
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
||||
getCpuidEx(0xB, i, data);
|
||||
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||
if (level == SmtLevel || level == CoreLevel) {
|
||||
numCores_[level - 1] = extractBit(data[1], 0, 15);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
Failed to deremine num of cores without x2APIC support.
|
||||
TODO: USE initial APIC ID to determine ncores.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = 0;
|
||||
numCores_[CoreLevel - 1] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
void setCacheHierarchy()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
@ -96,21 +150,12 @@ class Cpu {
|
|||
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||
const unsigned int UNIFIED_CACHE = 3;
|
||||
unsigned int smt_width = 0;
|
||||
unsigned int n_cores = 0;
|
||||
unsigned int data[4];
|
||||
unsigned int logical_cores = 0;
|
||||
unsigned int data[4] = {};
|
||||
|
||||
/*
|
||||
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
||||
If x2APIC is supported, these are the only correct numbers.
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
||||
smt_width = data[1] & 0x7FFF;
|
||||
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
||||
n_cores = data[1] & 0x7FFF;
|
||||
if (x2APIC_supported_) {
|
||||
smt_width = numCores_[0];
|
||||
logical_cores = numCores_[1];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -118,29 +163,29 @@ class Cpu {
|
|||
the first level of data cache is not shared (which is the
|
||||
case for every existing architecture) and use this to
|
||||
determine the SMT width for arch not supporting leaf 11.
|
||||
when leaf 4 reports a number of core less than n_cores
|
||||
when leaf 4 reports a number of core less than numCores_
|
||||
on socket reported by leaf 11, then it is a correct number
|
||||
of cores not an upperbound.
|
||||
*/
|
||||
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
|
||||
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
||||
getCpuidEx(0x4, i, data);
|
||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||
if (cacheType == NO_CACHE) break;
|
||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
|
||||
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||
}
|
||||
assert(nb_logical_cores != 0);
|
||||
data_cache_size[data_cache_levels] =
|
||||
assert(actual_logical_cores != 0);
|
||||
dataCacheSize_[dataCacheLevels_] =
|
||||
(extractBit(data[1], 22, 31) + 1)
|
||||
* (extractBit(data[1], 12, 21) + 1)
|
||||
* (extractBit(data[1], 0, 11) + 1)
|
||||
* (data[2] + 1);
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||
assert(smt_width != 0);
|
||||
cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
|
||||
data_cache_levels++;
|
||||
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
||||
dataCacheLevels_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -154,22 +199,25 @@ public:
|
|||
int displayFamily; // family + extFamily
|
||||
int displayModel; // model + extModel
|
||||
|
||||
// may I move these members into private?
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int data_cache_size[maxNumberCacheLevels];
|
||||
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
|
||||
unsigned int data_cache_levels;
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) {
|
||||
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
switch (level) {
|
||||
case SmtLevel: return numCores_[level - 1];
|
||||
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
|
||||
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int getDataCacheLevels() const { return data_cache_levels; }
|
||||
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||
{
|
||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||
return cores_sharing_data_cache[i];
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
return coresSharignDataCache_[i];
|
||||
}
|
||||
unsigned int getDataCacheSize(unsigned int i) const
|
||||
{
|
||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||
return data_cache_size[i];
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
return dataCacheSize_[i];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -177,22 +225,34 @@ public:
|
|||
*/
|
||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||
#else
|
||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||
#else
|
||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)ecxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline uint64 getXfeature()
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return _xgetbv(0);
|
||||
#else
|
||||
|
@ -202,6 +262,9 @@ public:
|
|||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
typedef uint64 Type;
|
||||
|
||||
|
@ -271,9 +334,13 @@ public:
|
|||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
, data_cache_levels(0)
|
||||
, x2APIC_supported_(false)
|
||||
, numCores_()
|
||||
, dataCacheSize_()
|
||||
, coresSharignDataCache_()
|
||||
, dataCacheLevels_(0)
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
const unsigned int& EAX = data[0];
|
||||
const unsigned int& EBX = data[1];
|
||||
const unsigned int& ECX = data[2];
|
||||
|
@ -363,6 +430,7 @@ public:
|
|||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
}
|
||||
setFamily();
|
||||
setNumCores();
|
||||
setCacheHierarchy();
|
||||
}
|
||||
void putFamily() const
|
||||
|
@ -381,6 +449,7 @@ class Clock {
|
|||
public:
|
||||
static inline uint64 getRdtsc()
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
|
@ -388,6 +457,10 @@ public:
|
|||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
Clock()
|
||||
: clock_(0)
|
||||
|
@ -416,7 +489,7 @@ const int UseRCX = 1 << 6;
|
|||
const int UseRDX = 1 << 7;
|
||||
|
||||
class Pack {
|
||||
static const size_t maxTblNum = 10;
|
||||
static const size_t maxTblNum = 15;
|
||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
||||
size_t n_;
|
||||
public:
|
||||
|
@ -476,7 +549,7 @@ public:
|
|||
const Xbyak::Reg64& operator[](size_t n) const
|
||||
{
|
||||
if (n >= n_) {
|
||||
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
|
||||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
}
|
||||
return *tbl_[n];
|
||||
|
@ -518,6 +591,7 @@ class StackFrame {
|
|||
static const int rcxPos = 3;
|
||||
static const int rdxPos = 2;
|
||||
#endif
|
||||
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
|
||||
Xbyak::CodeGenerator *code_;
|
||||
int pNum_;
|
||||
int tNum_;
|
||||
|
@ -527,7 +601,7 @@ class StackFrame {
|
|||
int P_;
|
||||
bool makeEpilog_;
|
||||
Xbyak::Reg64 pTbl_[4];
|
||||
Xbyak::Reg64 tTbl_[10];
|
||||
Xbyak::Reg64 tTbl_[maxRegNum];
|
||||
Pack p_;
|
||||
Pack t_;
|
||||
StackFrame(const StackFrame&);
|
||||
|
@ -539,7 +613,7 @@ public:
|
|||
make stack frame
|
||||
@param sf [in] this
|
||||
@param pNum [in] num of function parameter(0 <= pNum <= 4)
|
||||
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
|
||||
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
|
||||
@param stackSizeByte [in] local stack size
|
||||
@param makeEpilog [in] automatically call close() if true
|
||||
|
||||
|
@ -566,27 +640,17 @@ public:
|
|||
using namespace Xbyak;
|
||||
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
|
||||
const Reg64& _rsp = code->rsp;
|
||||
const AddressFrame& _ptr = code->ptr;
|
||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
P_ = saveNum_ + (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->push(Reg64(tbl[i]));
|
||||
}
|
||||
P_ = (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
P_ *= 8;
|
||||
if (P_ > 0) code->sub(_rsp, P_);
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#endif
|
||||
int pos = 0;
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
|
||||
|
@ -607,21 +671,11 @@ public:
|
|||
{
|
||||
using namespace Xbyak;
|
||||
const Reg64& _rsp = code_->rsp;
|
||||
const AddressFrame& _ptr = code_->ptr;
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#endif
|
||||
if (P_ > 0) code_->add(_rsp, P_);
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
|
||||
}
|
||||
|
||||
if (callRet) code_->ret();
|
||||
}
|
||||
|
@ -633,9 +687,6 @@ public:
|
|||
} catch (std::exception& e) {
|
||||
printf("ERR:StackFrame %s\n", e.what());
|
||||
exit(1);
|
||||
} catch (...) {
|
||||
printf("ERR:StackFrame otherwise\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
|
@ -654,7 +705,7 @@ private:
|
|||
}
|
||||
int getRegIdx(int& pos) const
|
||||
{
|
||||
assert(pos < 14);
|
||||
assert(pos < maxRegNum);
|
||||
using namespace Xbyak;
|
||||
const int *tbl = getOrderTbl();
|
||||
int r = tbl[pos++];
|
||||
|
|
Loading…
Reference in a new issue