From 080b4b3affbdc1d56f2f8230663725413ab03d21 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 22 Apr 2020 20:59:14 +0100 Subject: [PATCH] Squashed 'externals/xbyak/' changes from 671fc805..4a6fac8a 4a6fac8a update version to 5.77 801cf3fd cosmetic change of getNumCores d397e824 fix number of cores that share LLC cache a669e092 support non-intel-cpu visual studio af5f422e Merge branch 'fenghaitao-guard_x86' into develop 9b98dc17 Guard x86 specific codes with "#if defined(__i386__) || defined(__x86_64__)" dd4173e1 move some member variables input private f72646a7 update version 4612528f format change 4b95e862 Merge branch 'shelleygoel-master' 4c262fa6 add functionality to get num of cores using x2APIC ID bc70e7e1 recover Xbyak::CastTo d09a230f unlink Label when LabelManager is destroyed 973e8597 update version afdb9fe9 Xbyak::CastTo is removed b011aca4 add RegRip +/- int acae93cd increase max temp regs for StackFrame ea4e3562 util::StackFrame uses push/pop instead of mov 42462ef9 use evex encoding for vpslld/vpslldq/vpsraw/...(reg, mem, imm); da9117a9 update version of readme.md d35f4fb7 fix the encoding of vinsertps for disp8N 1de435ed bf uses Label class 613922bd add Label L() for convenience 43e15583 fix typo 93579ee6 add protect-re.cpp 60004b5c fix url of protect-re.cpp 348b2709 fix typo of doc f34f6ed5 update manual 232110be update test 82b78bf0 add setProtectMode dd8b290f put warning message if pageSize != 4096 64775ca2 a little refactoring 7c3e7b85 fix wrong VSIB encoding with idx >= 16 git-subtree-dir: externals/xbyak git-subtree-split: 4a6fac8ade404f667b94170f713367fe7da2a852 --- gen/avx_type.hpp | 5 + gen/gen_code.cpp | 22 +- readme.md | 479 +++++++++++++++++++++++------------------ readme.txt | 52 ++++- sample/bf.cpp | 32 ++- sample/protect-re.cpp | 70 ++++++ sample/static_buf.cpp | 2 +- sample/test0.cpp | 9 +- sample/test_util.cpp | 3 + sample/toyvm.cpp | 2 +- test/jmp.cpp | 123 ++++++++++- test/make_512.cpp | 245 ++++++++++----------- test/rip-label-imm.cpp | 4 +- test/sf_test.cpp | 88 +++++++- xbyak/xbyak.h | 108 +++++++--- xbyak/xbyak_mnemonic.h | 24 +-- xbyak/xbyak_util.h | 215 +++++++++++------- 17 files changed, 994 insertions(+), 489 deletions(-) create mode 100644 sample/protect-re.cpp diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp index 6f51166f..a659699e 100644 --- a/gen/avx_type.hpp +++ b/gen/avx_type.hpp @@ -37,6 +37,7 @@ T_B64 = 1 << 27, // m64bcst T_M_K = 1 << 28, // mem{k} T_VSIB = 1 << 29, + T_MEM_EVEX = 1 << 30, // use evex if mem T_XXX }; @@ -161,5 +162,9 @@ std::string type2String(int type) if (!str.empty()) str += " | "; str += "T_VSIB"; } + if (type & T_MEM_EVEX) { + if (!str.empty()) str += " | "; + str += "T_MEM_EVEX"; + } return str; } diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 37877bfe..43984c0c 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -76,7 +76,7 @@ void putX_X_XM(bool omitOnly) { 0xC2, "cmpss", T_0F | T_F3, true, true, 2 }, { 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 }, { 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 }, - { 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, true, true, 2 }, + { 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, true, true, 2 }, { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, @@ -1491,16 +1491,16 @@ void put() int idx; int type; } tbl[] = { - { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX }, - { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX }, - { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX }, - { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, - { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX }, - { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX }, - { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, + { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, + { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, + { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, + { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/readme.md b/readme.md index 2c41e822..480c0c16 100644 --- a/readme.md +++ b/readme.md @@ -1,107 +1,121 @@ -Xbyak 5.67 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ -============= +# Xbyak 5.77 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ -Abstract -------------- +## Abstract This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. -Feature -------------- -header file only -you can use Xbyak's functions at once if xbyak.h is included. +## Feature +* header file only +* Intel/MASM like syntax +* fully support AVX-512 -### Supported Instructions Sets +**Note**: Xbyak uses and(), or(), xor(), not() functions, so `-fno-operator-names` option is necessary for gcc/clang. -MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(*partial*)/AVX/AVX2/FMA/VEX-encoded GPR/AVX-512 +Or define `XBYAK_NO_OP_NAMES` before including `xbyak.h` and use and_(), or_(), xor_(), not_() instead of them. + +and_(), or_(), xor_(), not_() are always available. + +`XBYAK_NO_OP_NAMES` will be defined in the feature version. ### Supported OS -* Windows Xp, Vista, Windows 7(32bit, 64bit) +* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit) * Linux(32bit, 64bit) -* Intel Mac OSX +* Intel macOS ### Supported Compilers -* Visual Studio C++ VC2012 or later -* gcc 4.7 or later -* clang 3.3 -* cygwin gcc 4.5.3 -* icc 7.2 +Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin. ->Note: Xbyak uses and(), or(), xor(), not() functions, so "-fno-operator-names" option is required on gcc. -Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_() instead of them. -and_(), or_(), xor_(), not_() are always available. +## Install -Install -------------- - -The following files are necessary. Please add the path to your compile directories. +The following files are necessary. Please add the path to your compile directory. * xbyak.h * xbyak_mnemonic.h +* xbyak_util.h Linux: +``` +make install +``` - make install +These files are copied into `/usr/local/include/xbyak`. -These files are copied into /usr/local/include/xbyak +## How to use it -New Feature -------------- +Inherit `Xbyak::CodeGenerator` class and make the class method. +``` +#define XBYAK_NO_OP_NAMES +#include -Add support for AVX-512 instruction set. +struct Code : Xbyak::CodeGenerator { + Code(int x) + { + mov(eax, x); + ret(); + } +}; +``` +Make an instance of the class and get the function +pointer by calling `getCode()` and call it. +``` +Code c(5); +int (*f)() = c.getCode(); +printf("ret=%d\n", f()); // ret = 5 +``` -Syntax -------------- - -Make Xbyak::CodeGenerator and make the class method and get the function -pointer by calling cgetCode() and casting the return value. - - NASM Xbyak - mov eax, ebx --> mov(eax, ebx); - inc ecx inc(ecx); - ret --> ret(); - -### Addressing - - (ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement] - [rip + 32bit disp] ; x64 only - - NASM Xbyak - mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]); - test byte [esp], 4 --> test (byte [esp], 4); - - -How to use Selector(Segment Register) - ->Note: Segment class is not derived from Operand. +## Syntax +Similar to MASM/NASM syntax with parentheses. ``` -mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]); +NASM Xbyak +mov eax, ebx --> mov(eax, ebx); +inc ecx inc(ecx); +ret --> ret(); +``` + +## Addressing +Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory, +otherwise use `ptr`. + +``` +(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement] + [rip + 32bit disp] ; x64 only + +NASM Xbyak +mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]); +mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]); +test byte [esp], 4 --> test(byte [esp], 4); +inc qword [rax] --> inc(qword [rax]); +``` +**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type. + +### How to use Selector (Segment Register) +``` +mov eax, [fs:eax] --> putSeg(fs); + mov(eax, ptr [eax]); mov ax, cs --> mov(ax, cs); ``` +**Note**: Segment class is not derived from `Operand`. ->you can use ptr for almost memory access unless you specify the size of memory. +## AVX ->dword, word and byte are member variables, then don't use dword as unsigned int, for example. - -### AVX - - vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 - vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory - vgatherdpd(xmm1, ptr [ebp+123+xmm2*4], xmm3); - -*Remark* -The omitted destination syntax as the following ss disabled. ``` - vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3 +vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 +vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory +vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3); ``` -define `XBYAK_ENABLE_OMITTED_OPERAND` if you use it for backward compatibility. + +**Note**: +If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility. But the newer version will not support it. +``` +vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3 +``` -### AVX-512 +## AVX-512 ``` vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30); @@ -130,97 +144,122 @@ vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5) vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit ``` -Remark -* k1, ..., k7 are new opmask registers. +### Remark +* `k1`, ..., `k7` are opmask registers. * use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively. * `k4 | k3` is different from `k3 | k4`. * use `ptr_b` for broadcast `{1toX}`. X is automatically determined. -* specify xword/yword/zword(_b) for m128/m256/m512 if necessary. +* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary. -### Label +## Label +Two kinds of Label are supported. (String literal and Label class). - L("L1"); - jmp ("L1"); +### String literal +``` +L("L1"); + jmp("L1"); - jmp ("L2"); - ... - a few mnemonics(8-bit displacement jmp) - ... - L("L2"); + jmp("L2"); + ... + a few mnemonics (8-bit displacement jmp) + ... +L("L2"); - jmp ("L3", T_NEAR); - ... - a lot of mnemonics(32-bit displacement jmp) - ... - L("L3"); + jmp("L3", T_NEAR); + ... + a lot of mnemonics (32-bit displacement jmp) + ... +L("L3"); +``` ->Call hasUndefinedLabel() to verify your code has no undefined label. -> you can use a label for immediate value of mov like as mov (eax, "L2"); +* Call `hasUndefinedLabel()` to verify your code has no undefined label. +* you can use a label for immediate value of mov like as `mov(eax, "L2")`. -#### 1. support @@, @f, @b like MASM +### Support `@@`, `@f`, `@b` like MASM - L("@@"); // - jmp("@b"); // jmp to - jmp("@f"); // jmp to - L("@@"); // - jmp("@b"); // jmp to - mov(eax, "@b"); - jmp(eax); // jmp to +``` +L("@@"); // + jmp("@b"); // jmp to + jmp("@f"); // jmp to +L("@@"); // + jmp("@b"); // jmp to + mov(eax, "@b"); + jmp(eax); // jmp to +``` -#### 2. localization of label by calling inLocalLabel(), outLocallabel(). +### Local label -labels begining of period between inLocalLabel() and outLocalLabel() -are dealed with local label. -inLocalLabel() and outLocalLabel() can be nested. +Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()` +are treated as a local label. +`inLocalLabel()` and `outLocalLabel()` can be nested. - void func1() - { - inLocalLabel(); - L(".lp"); // ; local label - ... - jmp(".lp"); // jmpt to - L("aaa"); // global label - outLocalLabel(); - } +``` +void func1() +{ + inLocalLabel(); + L(".lp"); // ; local label + ... + jmp(".lp"); // jmp to + L("aaa"); // global label + outLocalLabel(); - void func2() - { - inLocalLabel(); - L(".lp"); // ; local label - func1(); - jmp(".lp"); // jmp to - inLocalLabel(); - } + inLocalLabel(); + L(".lp"); // ; local label + func1(); + jmp(".lp"); // jmp to + inLocalLabel(); + jmp("aaa"); // jmp to +} +``` ### Label class -L() and jxx() functions support a new Label class. +`L()` and `jxx()` support Label class. - Label label1, label2; - L(label1); - ... - jmp(label1); - ... - jmp(label2); - ... - L(label2); +``` + Xbyak::Label label1, label2; +L(label1); + ... + jmp(label1); + ... + jmp(label2); + ... +L(label2); +``` -Moreover, assignL(dstLabel, srcLabel) method binds dstLabel with srcLabel. +Use `putL` for jmp table +``` + Label labelTbl, L0, L1, L2; + mov(rax, labelTbl); + // rdx is an index of jump table + jmp(ptr [rax + rdx * sizeof(void*)]); +L(labelTbl); + putL(L0); + putL(L1); + putL(L2); +L(L0); + .... +L(L1); + .... +``` - Label label1, label2; - L(label1); - ... - jmp(label2); - ... - assignL(label2, label1); // label2 <= label1 +`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel. -The above jmp opecode jumps label1. +``` + Label label2; + Label label1 = L(); // make label1 ; same to Label label1; L(label1); + ... + jmp(label2); // label2 is not determined here + ... + assignL(label2, label1); // label2 <- label1 +``` +The `jmp` in the above code jumps to label1 assigned by `assignL`. -* Restriction: -* srcLabel must be used in L(). -* dstLabel must not be used in L(). +**Note**: +* srcLabel must be used in `L()`. +* dstLabel must not be used in `L()`. -Label::getAddress() returns the address specified by the label instance and 0 if not specified. +`Label::getAddress()` returns the address specified by the label instance and 0 if not specified. ``` // not AutoGrow mode Label label; @@ -229,7 +268,7 @@ L(label); assert(label.getAddress() == getCurr()); ``` -### Rip +### Rip ; relative addressing ``` Label label; mov(eax, ptr [rip + label]); // eax = 4 @@ -243,92 +282,127 @@ int x; ... mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB ``` -### Code size -The default max code size is 4096 bytes. Please set it in constructor of CodeGenerator() if you want to use large size. - class Quantize : public Xbyak::CodeGenerator { - public: - Quantize() - : CodeGenerator(8192) - { - } - ... - }; +## Code size +The default max code size is 4096 bytes. +Specify the size in constructor of `CodeGenerator()` if necessary. -### use user allocated memory +``` +class Quantize : public Xbyak::CodeGenerator { +public: + Quantize() + : CodeGenerator(8192) + { + } + ... +}; +``` + +## User allocated memory You can make jit code on prepaired memory. - class Sample : public Xbyak::CodeGenerator { - public: - Sample(void *userPtr, size_t size) - : Xbyak::CodeGenerator(size, userPtr) - { - ... - } - }; +Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory. - const size_t codeSize = 1024; - uint8 buf[codeSize + 16]; - - // get 16-byte aligned address - uint8 *p = Xbyak::CodeArray::getAlignedAddress(buf); - - // append executable attribute to the memory - Xbyak::CodeArray::protect(p, codeSize, true); - - // construct your jit code on the memory - Sample s(p, codeSize); - ->See *sample/test0.cpp* - -AutoGrow -------------- - -Under `AutoGrow` mode, Xbyak extends memory automatically if necessary. -Call ready() before calling getCode() to calc address of jmp. ``` - struct Code : Xbyak::CodeGenerator { - Code() - : Xbyak::CodeGenerator(, Xbyak::AutoGrow) - { - ... - } - }; +uint8_t alignas(4096) buf[8192]; // C++11 or later + +struct Code : Xbyak::CodeGenerator { + Code() : Xbyak::CodeGenerator(sizeof(buf), buf) + { + mov(rax, 123); + ret(); + } +}; + +int main() +{ Code c; - c.ready(); // Don't forget to call this function + c.setProtectModeRE(); // set memory to Read/Exec + printf("%d\n", c.getCode()()); +} ``` ->Don't use the address returned by getCurr() before calling ready(). ->It may be invalid address. ->RESTRICTION : rip addressing is not supported in AutoGrow -Macro -------------- +**Note**: See [sample/test0.cpp](sample/test0.cpp). + +### AutoGrow + +The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`. + +Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address. +``` +struct Code : Xbyak::CodeGenerator { + Code() + : Xbyak::CodeGenerator(, Xbyak::AutoGrow) + { + ... + } +}; +Code c; +// generate code for jit +c.ready(); // mode = Read/Write/Exec +``` + +**Note**: +* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address. + +### Read/Exec mode +Xbyak set Read/Write/Exec mode to memory to run jit code. +If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and +call `setProtectModeRE()` after generating jit code. + +``` +struct Code : Xbyak::CodeGenerator { + Code() + : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE) + { + mov(eax, 123); + ret(); + } +}; + +Code c; +c.setProtectModeRE(); +... + +``` +Call `readyRE()` instead of `ready()` when using `AutoGrow` mode. +See [protect-re.cpp](sample/protect-re.cpp). + +## Macro * **XBYAK32** is defined on 32bit. * **XBYAK64** is defined on 64bit. * **XBYAK64_WIN** is defined on 64bit Windows(VC) * **XBYAK64_GCC** is defined on 64bit gcc, cygwin * define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names` -* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(duplicated in the future) +* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future) * define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro -Sample -------------- +## Sample -* test0.cpp ; tiny sample of Xbyak(x86, x64) -* quantize.cpp ; JIT optimized quantization by fast division(x86 only) -* calc.cpp ; assemble and estimate a given polynomial(x86, x64) -* bf.cpp ; JIT brainfuck(x86, x64) +* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64) +* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only) +* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64) +* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64) -License -------------- +## License modified new BSD License http://opensource.org/licenses/BSD-3-Clause -History -------------- +## History +* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov +* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel +* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility +* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed +* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed +* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov +* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8) +* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) +* 2018/Sep/04 ver 5.71 L() returns a new label instance +* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting +* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday) * 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm * 2018/Jul/26 ver 5.661 support mingw64 * 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect() @@ -392,8 +466,7 @@ History * 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64) * 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class * 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label -* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). - support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest). +* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest). * 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions * 2013/Mar/27 ver 3.80 support mov(reg, "label"); * 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz() @@ -453,8 +526,6 @@ History * 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp]) * 2007/Jan/4 first version -Author -------------- - +## Author MITSUNARI Shigeo(herumi@nifty.com) diff --git a/readme.txt b/readme.txt index 74eb5912..b5c02fce 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.67 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.77 ----------------------------------------------------------------------------- ◎概要 @@ -245,8 +245,8 @@ void func2() 更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。 - Label label1, label2; - L(label1); + Label label2; + Label label1 = L(); // Label label1; L(label1);と同じ意味 ... jmp(label2); ... @@ -309,6 +309,41 @@ bool CodeArray::protect(const void *addr, size_t size, bool canExec); */ uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE); +・read/execモード +デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。 +コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。 + +struct Code : Xbyak::CodeGenerator { + Code() + : Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成 + { + mov(eax, 123); + ret(); + } +}; + +Code c; +c.setProtectModeRE(); // read/execモードに変更 +// JIT領域を実行 + +AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。 + +struct Code : Xbyak::CodeGenerator { + Code() + : Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成 + { + mov(eax, 123); + ret(); + } +}; + +Code c; +c.readyRE(); // read/exeモードに変更 +// JIT領域を実行 + +setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。 + + その他詳細は各種サンプルを参照してください。 ----------------------------------------------------------------------------- ◎マクロ @@ -338,6 +373,17 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から ----------------------------------------------------------------------------- ◎履歴 +2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov) +2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel) +2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元 +2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す +2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除 +2018/10/15 util::StackFrameでmovの代わりにpush/popを使う +2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整 +2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) +2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加 +2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加 +2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday) 2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm 2018/07/26 ver 5.661 mingw64対応 2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加 diff --git a/sample/bf.cpp b/sample/bf.cpp index ce5c12e0..20a0fd96 100644 --- a/sample/bf.cpp +++ b/sample/bf.cpp @@ -10,12 +10,6 @@ #endif class Brainfuck : public Xbyak::CodeGenerator { -private: - enum Direction { B, F }; - std::string toStr(int labelNo, Direction dir) - { - return Xbyak::Label::toStr(labelNo) + (dir == B ? 'B' : 'F'); - } public: int getContinuousChar(std::istream& is, char c) { @@ -67,8 +61,7 @@ public: mov(pGetchar, rsi); // getchar mov(stack, rdx); // stack #endif - int labelNo = 0; - std::stack keepLabelNo; + std::stack