externals: Update oaknut to 1.1.3

Merge commit 'cb8abc3ae5a1fcd3d7b6ab73472cdf9093302631'
This commit is contained in:
Merry 2022-11-15 15:37:16 +00:00
commit dd36a52048
15 changed files with 2220 additions and 22 deletions

View file

@ -29,7 +29,7 @@ jobs:
-B ${{github.workspace}}/build -B ${{github.workspace}}/build
-H. -H.
-GNinja -GNinja
-DDYNARMIC_USE_BUNDLED_CATCH=ON -DOAKNUT_USE_BUNDLED_CATCH=ON
- name: Build - name: Build
working-directory: ${{github.workspace}}/build working-directory: ${{github.workspace}}/build

View file

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.8) cmake_minimum_required(VERSION 3.8)
project(oaknut LANGUAGES CXX VERSION 0.0.0) project(oaknut LANGUAGES CXX VERSION 1.1.3)
# Determine if we're built as a subproject (using add_subdirectory) # Determine if we're built as a subproject (using add_subdirectory)
# or if this is the master project. # or if this is the master project.
@ -18,11 +18,13 @@ endif()
# Source project files # Source project files
set(header_files set(header_files
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_mnemonics.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/fpsimd_mnemonics.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.1.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/multi_typed_name.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/multi_typed_name.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/offset.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/offset.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp
@ -39,7 +41,8 @@ target_compile_features(oaknut INTERFACE cxx_std_20)
# Tests # Tests
if (MASTER_PROJECT) if (MASTER_PROJECT)
if (DYNARMIC_USE_BUNDLED_CATCH) option(OAKNUT_USE_BUNDLED_CATCH "Use the embedded Catch2 submodule" OFF)
if (OAKNUT_USE_BUNDLED_CATCH)
add_subdirectory(externals/catch) add_subdirectory(externals/catch)
else() else()
find_package(Catch2 3 REQUIRED) find_package(Catch2 3 REQUIRED)
@ -52,5 +55,33 @@ if (MASTER_PROJECT)
) )
target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests) target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests)
target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut) target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut)
if (MSVC)
target_compile_options(oaknut-tests PRIVATE
/experimental:external
/external:W0
/external:anglebrackets
/W4
/w44263 # Non-virtual member function hides base class virtual function
/w44265 # Class has virtual functions, but destructor is not virtual
/w44456 # Declaration of 'var' hides previous local declaration
/w44457 # Declaration of 'var' hides function parameter
/w44458 # Declaration of 'var' hides class member
/w44459 # Declaration of 'var' hides global definition
/w44946 # Reinterpret-cast between related types
/wd4592 # Symbol will be dynamically initialized (implementation limitation)
/permissive- # Stricter C++ standards conformance
/MP
/Zi
/Zo
/EHsc
/Zc:externConstexpr # Allows external linkage for variables declared "extern constexpr", as the standard permits.
/Zc:inline # Omits inline functions from object-file output.
/Zc:throwingNew # Assumes new (without std::nothrow) never returns null.
/volatile:iso # Use strict standard-abiding volatile semantics
/bigobj # Increase number of sections in .obj files
/DNOMINMAX
)
else()
target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces) target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces)
endif() endif()
endif()

View file

@ -1,14 +1,19 @@
# Oaknut # Oaknut
*A C++20 assembler for AArch64 (ARMv8.0)* *A C++20 assembler for AArch64 (ARMv8.0 to ARMv8.2)*
Oaknut is a header-only library that allows one to dynamically assemble code in-memory at runtime. Oaknut is a header-only library that allows one to dynamically assemble code in-memory at runtime.
## Usage ## Usage
Provide `oaknut::CodeGenerator` with a pointer to a block of memory. Call functions on it to emit code.
Simple example: Simple example:
```cpp ```cpp
#include <cstdio>
#include <oaknut/oaknut.hpp>
using EmittedFunction = int (*)(); using EmittedFunction = int (*)();
EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value) EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
@ -17,13 +22,92 @@ EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
EmittedFunction result = code.ptr<EmittedFunction>(); EmittedFunction result = code.ptr<EmittedFunction>();
code.MOVZ(W0, value); code.MOV(W0, value);
code.RET(); code.RET();
return result; return result;
} }
int main()
{
oaknut::CodeBlock mem{4096};
oaknut::CodeGenerator code{mem.ptr()};
mem.unprotect();
EmittedFunction fn = EmitExample(code, 42);
mem.protect();
mem.invalidate_all();
std::printf("%i\n", fn()); // Output: 42
return 0;
}
``` ```
### Instructions
Each AArch64 instruction corresponds to one emitter function. For a list of emitter functions see:
* ARMv8.0: [general instructions](include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp), [FP & SIMD instructions](include/oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp)
* ARMv8.1: [general instructions](include/oaknut/impl/mnemonics_generic_v8.1.inc.hpp), [FP & SIMD instructions](include/oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp)
* ARMv8.2: [general instructions](include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp), [FP & SIMD instructions](include/oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp)
### Operands
The `oaknut::util` namespace provides convenient names for operands for instructions. For example:
|Name|Class| |
|----|----|----|
|W0, W1, ..., W30|`WReg`|32-bit general purpose registers|
|X0, X1, ..., X30|`XReg`|64-bit general purpose registers|
|WZR|`WzrReg` (convertable to `WReg`)|32-bit zero register|
|XZR|`ZrReg` (convertable to `XReg`)|64-bit zero register|
|WSP|`WspReg` (convertable to `WRegSp`)|32-bit stack pointer|
|SP|`SpReg` (convertable to `XRegSp`)|64-bit stack pointer|
|B0, B1, ..., B31|`BReg`|8-bit scalar SIMD register|
|H0, H1, ..., H31|`HReg`|16-bit scalar SIMD register|
|S0, S1, ..., S31|`SReg`|32-bit scalar SIMD register|
|D0, D1, ..., D31|`DReg`|64-bit scalar SIMD register|
|Q0, Q1, ..., Q31|`QReg`|128-bit scalar SIMD register|
For vector operations, you can specify registers like so:
|Name|Class| |
|----|----|----|
|V0.B8(), ...|`VReg_8B`|8 elements each 8 bits in size|
|V0.B16(), ...|`VReg_16B`|16 elements each 8 bits in size|
|V0.H4(), ...|`VReg_4H`|4 elements each 16 bits in size|
|V0.H8(), ...|`VReg_8H`|8 elements each 16 bits in size|
|V0.S2(), ...|`VReg_2S`|2 elements each 32 bits in size|
|V0.S4(), ...|`VReg_4S`|4 elements each 32 bits in size|
|V0.D1(), ...|`VReg_1D`|1 elements each 64 bits in size|
|V0.D2(), ...|`VReg_2D`|2 elements each 64 bits in size|
And you can specify elements like so:
|Name|Class| |
|----|----|----|
|V0.B()[0]|`BElem`|0th 8-bit element of V0 register|
|V0.H()[0]|`HElem`|0th 16-bit element of V0 register|
|V0.S()[0]|`SElem`|0th 32-bit element of V0 register|
|V0.D()[0]|`DElem`|0th 64-bit element of V0 register|
Register lists are specified using `List`:
```
List{V0.B16(), V1.B16(), V2.B16()} // This expression has type List<VReg_16B, 3>
```
And lists of elements similarly (both forms are equivalent):
```
List{V0.B()[1], V1.B()[1], V2.B()[1]} // This expression has type List<BElem, 3>
List{V0.B(), V1.B(), V2.B()}[1] // This expression has type List<BElem, 3>
```
You can find examples of instruction use in [tests/general.cpp](tests/general.cpp) and [tests/fpsimd.cpp](tests/fpsimd.cpp).
## License ## License
This project is [MIT licensed](LICENSE). This project is [MIT licensed](LICENSE).

View file

@ -9,6 +9,7 @@
#include <new> #include <new>
#if defined(_WIN32) #if defined(_WIN32)
# define NOMINMAX
# include <windows.h> # include <windows.h>
#elif defined(__APPLE__) #elif defined(__APPLE__)
# include <libkern/OSCacheControl.h> # include <libkern/OSCacheControl.h>
@ -78,6 +79,8 @@ public:
{ {
#if defined(__APPLE__) #if defined(__APPLE__)
sys_icache_invalidate(mem, size); sys_icache_invalidate(mem, size);
#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), mem, size);
#else #else
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000; static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;

View file

@ -4038,11 +4038,11 @@ void MOV(DReg rd, DElem en)
} }
void MOV(WReg wd, SElem en) void MOV(WReg wd, SElem en)
{ {
emit<"00001110000ii100001111nnnnnddddd", "d", "n", "x">(wd, en.reg_index(), en.elem_index()); emit<"00001110000xx100001111nnnnnddddd", "d", "n", "x">(wd, en.reg_index(), en.elem_index());
} }
void MOV(XReg xd, DElem en) void MOV(XReg xd, DElem en)
{ {
emit<"01001110000i1000001111nnnnnddddd", "d", "n", "x">(xd, en.reg_index(), en.elem_index()); emit<"01001110000x1000001111nnnnnddddd", "d", "n", "x">(xd, en.reg_index(), en.elem_index());
} }
void MOV(VReg_8B rd, VReg_8B rn) void MOV(VReg_8B rd, VReg_8B rn)
{ {

View file

@ -0,0 +1,111 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void SQRDMLAH(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0111111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLAH(SReg rd, SReg rn, SElem em)
{
emit<"0111111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_4H rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0010111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_8H rd, VReg_8H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0110111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_2S rd, VReg_2S rn, SElem em)
{
emit<"0010111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"0110111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLAH(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(SReg rd, SReg rn, SReg rm)
{
emit<"01111110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_2S rd, VReg_2S rn, VReg_2S rm)
{
emit<"00101110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"01101110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0111111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLSH(SReg rd, SReg rn, SElem em)
{
emit<"0111111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_4H rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0010111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_8H rd, VReg_8H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0110111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_2S rd, VReg_2S rn, SElem em)
{
emit<"0010111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"0110111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLSH(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(SReg rd, SReg rn, SReg rm)
{
emit<"01111110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_2S rd, VReg_2S rn, VReg_2S rm)
{
emit<"00101110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"01101110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}

View file

@ -0,0 +1,855 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void BCAX(VReg_16B rd, VReg_16B rn, VReg_16B rm, VReg_16B ra)
{
emit<"11001110001mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
}
void EOR3(VReg_16B rd, VReg_16B rn, VReg_16B rm, VReg_16B ra)
{
emit<"11001110000mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
}
void FABD(HReg rd, HReg rn, HReg rm)
{
emit<"01111110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FABD(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FABD(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FABS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FABS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FACGE(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGE(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGE(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGT(HReg rd, HReg rn, HReg rm)
{
emit<"01111110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGT(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGT(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADD(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADD(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADDP(HReg rd, VReg_2H rn)
{
emit<"0101111000110000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FADDP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADDP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(HReg rd, HReg rn, HReg rm)
{
emit<"01011110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0101111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMEQ(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0000111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMEQ(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0100111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGE(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGE(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGE(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGE(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0111111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGE(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0010111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGE(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0110111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGT(HReg rd, HReg rn, HReg rm)
{
emit<"01111110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGT(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGT(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGT(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0101111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGT(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0000111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGT(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0100111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLE(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0111111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLE(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0010111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLE(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0110111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLT(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0101111011111000111010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLT(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0000111011111000111010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLT(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0100111011111000111010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAS(HReg rd, HReg rn)
{
emit<"0101111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAU(HReg rd, HReg rn)
{
emit<"0111111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMS(HReg rd, HReg rn)
{
emit<"0101111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMU(HReg rd, HReg rn)
{
emit<"0111111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNS(HReg rd, HReg rn)
{
emit<"0101111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNU(HReg rd, HReg rn)
{
emit<"0111111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPS(HReg rd, HReg rn)
{
emit<"0101111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPU(HReg rd, HReg rn)
{
emit<"0111111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZS(HReg rd, HReg rn)
{
emit<"0101111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZU(HReg rd, HReg rn)
{
emit<"0111111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FDIV(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FDIV(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAX(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAX(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNM(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNM(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNMP(HReg rd, VReg_2H rn)
{
emit<"0101111000110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXNMP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNMP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNMV(HReg rd, VReg_4H rn)
{
emit<"0000111000110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXNMV(HReg rd, VReg_8H rn)
{
emit<"0100111000110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXP(HReg rd, VReg_2H rn)
{
emit<"0101111000110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXV(HReg rd, VReg_4H rn)
{
emit<"0000111000110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXV(HReg rd, VReg_8H rn)
{
emit<"0100111000110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMIN(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMIN(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNM(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNM(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNMP(HReg rd, VReg_2H rn)
{
emit<"0101111010110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMINNMP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNMP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNMV(HReg rd, VReg_4H rn)
{
emit<"0000111010110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMINNMV(HReg rd, VReg_8H rn)
{
emit<"0100111010110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMINP(HReg rd, VReg_2H rn)
{
emit<"0101111010110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMINP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINV(HReg rd, VReg_4H rn)
{
emit<"0000111010110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMINV(HReg rd, VReg_8H rn)
{
emit<"0100111010110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMLA(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0101111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLA(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0000111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLA(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0100111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLA(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLA(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0000111110LMmmmm0000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0100111110LMmmmm0000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL2(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0010111110LMmmmm1000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL2(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0110111110LMmmmm1000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00001110001mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01001110001mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL2(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00101110001mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL2(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01101110001mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLS(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0101111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLS(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0000111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLS(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0100111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0000111110LMmmmm0100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0100111110LMmmmm0100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL2(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0010111110LMmmmm1100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL2(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0110111110LMmmmm1100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00001110101mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01001110101mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL2(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00101110101mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL2(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01101110101mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMOV(VReg_4H rd, FImm8 imm)
{
emit<"0000111100000vvv111111vvvvvddddd", "d", "v">(rd, imm);
}
void FMOV(VReg_8H rd, FImm8 imm)
{
emit<"0100111100000vvv111111vvvvvddddd", "d", "v">(rd, imm);
}
void FMUL(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0101111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMUL(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0000111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMUL(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0100111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMUL(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMUL(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(HReg rd, HReg rn, HReg rm)
{
emit<"01011110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0111111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMULX(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0010111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMULX(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw "InvalidCombination";
emit<"0110111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FNEG(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FNEG(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPE(HReg rd, HReg rn)
{
emit<"0101111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPE(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPE(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPS(HReg rd, HReg rn, HReg rm)
{
emit<"01011110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRECPS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRECPS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRECPX(HReg rd, HReg rn)
{
emit<"0101111011111001111110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTA(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTA(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTI(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTI(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTM(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTM(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTN(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTN(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTP(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTP(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTX(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTX(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTZ(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTZ(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTE(HReg rd, HReg rn)
{
emit<"0111111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTE(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTE(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTS(HReg rd, HReg rn, HReg rm)
{
emit<"01011110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRSQRTS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRSQRTS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FSQRT(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001111110nnnnnddddd", "d", "n">(rd, rn);
}
void FSQRT(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001111110nnnnnddddd", "d", "n">(rd, rn);
}
void FSUB(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FSUB(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void RAX1(VReg_2D rd, VReg_2D rn, VReg_2D rm)
{
emit<"11001110011mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SCVTF(HReg rd, HReg rn)
{
emit<"0101111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void SCVTF(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void SCVTF(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void SDOT(VReg_2S rd, VReg_8B rn, SElem em)
{
emit<"0000111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SDOT(VReg_4S rd, VReg_16B rn, SElem em)
{
emit<"0100111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SDOT(VReg_2S rd, VReg_8B rn, VReg_8B rm)
{
emit<"00001110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SDOT(VReg_4S rd, VReg_16B rn, VReg_16B rm)
{
emit<"01001110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SHA512H(QReg rd, QReg rn, VReg_2D rm)
{
emit<"11001110011mmmmm100000nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SHA512H2(QReg rd, QReg rn, VReg_2D rm)
{
emit<"11001110011mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SHA512SU0(VReg_2D rd, VReg_2D rn)
{
emit<"1100111011000000100000nnnnnddddd", "d", "n">(rd, rn);
}
void SHA512SU1(VReg_2D rd, VReg_2D rn, VReg_2D rm)
{
emit<"11001110011mmmmm100010nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SM3PARTW1(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"11001110011mmmmm110000nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SM3PARTW2(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"11001110011mmmmm110001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SM3SS1(VReg_4S rd, VReg_4S rn, VReg_4S rm, VReg_4S ra)
{
emit<"11001110010mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
}
void SM3TT1A(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii00nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM3TT1B(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii01nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM3TT2A(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii10nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM3TT2B(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii11nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM4E(VReg_4S rd, VReg_4S rn)
{
emit<"1100111011000000100001nnnnnddddd", "d", "n">(rd, rn);
}
void SM4EKEY(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"11001110011mmmmm110010nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void UCVTF(HReg rd, HReg rn)
{
emit<"0111111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void UCVTF(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void UCVTF(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void UDOT(VReg_2S rd, VReg_8B rn, SElem em)
{
emit<"0010111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void UDOT(VReg_4S rd, VReg_16B rn, SElem em)
{
emit<"0110111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void UDOT(VReg_2S rd, VReg_8B rn, VReg_8B rm)
{
emit<"00101110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void UDOT(VReg_4S rd, VReg_16B rn, VReg_16B rm)
{
emit<"01101110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void XAR(VReg_2D rd, VReg_2D rn, VReg_2D rm, Imm<6> rotate_amount)
{
emit<"11001110100mmmmmiiiiiinnnnnddddd", "d", "n", "m", "i">(rd, rn, rm, rotate_amount);
}

View file

@ -87,11 +87,11 @@ void ADDS(XReg xd, XReg xn, XReg xm, AddSubShift shift = AddSubShift::LSL, Imm<6
{ {
emit<"10101011ss0mmmmmiiiiiinnnnnddddd", "d", "n", "m", "s", "i">(xd, xn, xm, shift, shift_amount); emit<"10101011ss0mmmmmiiiiiinnnnnddddd", "d", "n", "m", "s", "i">(xd, xn, xm, shift, shift_amount);
} }
void ADR(XReg xd, AddrOffset<21, 0> label) void ADR(XReg xd, PageOffset<21, 0> label)
{ {
emit<"0ii10000iiiiiiiiiiiiiiiiiiiddddd", "d", "i">(xd, label); emit<"0ii10000iiiiiiiiiiiiiiiiiiiddddd", "d", "i">(xd, label);
} }
void ADRP(XReg xd, PageOffset<21> label) void ADRP(XReg xd, PageOffset<21, 12> label)
{ {
emit<"1ii10000iiiiiiiiiiiiiiiiiiiddddd", "d", "i">(xd, label); emit<"1ii10000iiiiiiiiiiiiiiiiiiiddddd", "d", "i">(xd, label);
} }

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void BFC(WReg wd, Imm<5> lsb, Imm<5> width)
{
if (width.value() == 0 || width.value() > (32 - lsb.value()))
throw "invalid width";
emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (-lsb.value()) & 31, width.value() - 1);
}
void BFC(XReg xd, Imm<6> lsb, Imm<6> width)
{
if (width.value() == 0 || width.value() > (64 - lsb.value()))
throw "invalid width";
emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (-lsb.value()) & 63, width.value() - 1);
}
void ESB()
{
emit<"11010101000000110010001000011111">();
}
void PSB()
{
emit<"11010101000000110010001000111111">();
}

View file

@ -64,7 +64,7 @@ private:
std::variant<std::uint32_t, Label*, void*> m_payload; std::variant<std::uint32_t, Label*, void*> m_payload;
}; };
template<std::size_t bitsize> template<std::size_t bitsize, std::size_t shift_amount>
struct PageOffset { struct PageOffset {
PageOffset(void* ptr) PageOffset(void* ptr)
: m_payload(ptr) : m_payload(ptr)
@ -76,10 +76,11 @@ struct PageOffset {
static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target) static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target)
{ {
const std::int64_t page_diff = (static_cast<std::int64_t>(target) >> 12) - (static_cast<std::int64_t>(current_addr) >> 12); std::uint64_t diff = (static_cast<std::uint64_t>(target) >> shift_amount) - (static_cast<std::uint64_t>(current_addr) >> shift_amount);
if (detail::sign_extend<bitsize>(page_diff) != page_diff) if (detail::sign_extend<bitsize>(diff) != diff)
throw "out of range"; throw "out of range";
return static_cast<std::uint32_t>(page_diff & detail::mask_from_size(bitsize)); diff &= detail::mask_from_size(bitsize);
return static_cast<std::uint32_t>(((diff & 3) << (bitsize - 2)) | (diff >> 2));
} }
private: private:

View file

@ -28,6 +28,7 @@ struct HReg;
struct SReg; struct SReg;
struct DReg; struct DReg;
struct QReg; struct QReg;
struct VReg_2H;
struct VReg_8B; struct VReg_8B;
struct VReg_4H; struct VReg_4H;
struct VReg_2S; struct VReg_2S;
@ -185,10 +186,10 @@ struct VReg : public Reg {
}; };
struct VRegArranged : public Reg { struct VRegArranged : public Reg {
protected:
constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_) constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_)
: Reg(true, bitsize_, index_), m_esize(esize_) : Reg(true, bitsize_, index_), m_esize(esize_)
{ {
assert(bitsize_ == 64 || bitsize_ == 128);
assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two"); assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two");
assert(esize_ <= bitsize_); assert(esize_ <= bitsize_);
} }
@ -200,6 +201,15 @@ private:
int m_esize : 8; int m_esize : 8;
}; };
struct VReg_2H : public VRegArranged {
constexpr explicit VReg_2H(int reg_index_)
: VRegArranged(32, reg_index_, 32 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_8B : public VRegArranged { struct VReg_8B : public VRegArranged {
constexpr explicit VReg_8B(int reg_index_) constexpr explicit VReg_8B(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 8) : VRegArranged(64, reg_index_, 64 / 8)
@ -445,6 +455,7 @@ struct VRegSelector {
constexpr ElemSelector<SElem> S() const { return ElemSelector<SElem>(index()); } constexpr ElemSelector<SElem> S() const { return ElemSelector<SElem>(index()); }
constexpr ElemSelector<DElem> D() const { return ElemSelector<DElem>(index()); } constexpr ElemSelector<DElem> D() const { return ElemSelector<DElem>(index()); }
constexpr VReg_2H H2() const { return VReg_2H{index()}; }
constexpr VReg_8B B8() const { return VReg_8B{index()}; } constexpr VReg_8B B8() const { return VReg_8B{index()}; }
constexpr VReg_4H H4() const { return VReg_4H{index()}; } constexpr VReg_4H H4() const { return VReg_4H{index()}; }
constexpr VReg_2S S2() const { return VReg_2S{index()}; } constexpr VReg_2S S2() const { return VReg_2S{index()}; }

View file

@ -98,8 +98,12 @@ public:
label.m_wbs.clear(); label.m_wbs.clear();
} }
#include "oaknut/impl/arm64_mnemonics.inc.hpp" #include "oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp"
#include "oaknut/impl/fpsimd_mnemonics.inc.hpp" #include "oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp"
#include "oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp"
#include "oaknut/impl/mnemonics_generic_v8.0.inc.hpp"
#include "oaknut/impl/mnemonics_generic_v8.1.inc.hpp"
#include "oaknut/impl/mnemonics_generic_v8.2.inc.hpp"
void RET() void RET()
{ {
@ -220,13 +224,13 @@ private:
v.m_payload); v.m_payload);
} }
template<std::uint32_t splat, std::size_t size> template<std::uint32_t splat, std::size_t size, std::size_t shift_amount>
std::uint32_t encode(PageOffset<size> v) std::uint32_t encode(PageOffset<size, shift_amount> v)
{ {
static_assert(std::popcount(splat) == size); static_assert(std::popcount(splat) == size);
const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) { const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
return pdep<splat>(PageOffset<size>::encode(current_addr, target)); return pdep<splat>(PageOffset<size, shift_amount>::encode(current_addr, target));
}; };
return std::visit(detail::overloaded{ return std::visit(detail::overloaded{

View file

@ -115,3 +115,47 @@ TEST_CASE("Immediate generation (64-bit)")
REQUIRE(f() == value); REQUIRE(f() == value);
} }
} }
TEST_CASE("ADR")
{
CodeBlock mem{4096};
for (std::int64_t i = -1048576; i < 1048576; i++) {
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem.ptr()) + i;
CodeGenerator code{mem.ptr()};
auto f = code.ptr<std::intptr_t (*)()>();
mem.unprotect();
code.ADR(X0, reinterpret_cast<void*>(value));
code.RET();
mem.protect();
mem.invalidate_all();
INFO(i);
REQUIRE(f() == value);
}
}
TEST_CASE("ADRP")
{
CodeBlock mem{4096};
for (int i = 0; i < 0x200000; i++) {
const std::int64_t diff = RandInt<std::int64_t>(-4294967296, 4294967295);
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem.ptr()) + diff;
const std::uint64_t expect = static_cast<std::uint64_t>(value) & ~static_cast<std::uint64_t>(0xfff);
CodeGenerator code{mem.ptr()};
auto f = code.ptr<std::uint64_t (*)()>();
mem.unprotect();
code.ADRP(X0, reinterpret_cast<void*>(value));
code.RET();
mem.protect();
mem.invalidate_all();
INFO(i);
REQUIRE(f() == expect);
}
}

View file

@ -432,6 +432,10 @@ T(0x6e135ec1, MOV(V1.B()[9], V22.B()[11]))
T(0x4e0f1da9, MOV(V9.B()[7], W13)) T(0x4e0f1da9, MOV(V9.B()[7], W13))
T(0x5e0e045d, MOV(H29, V2.H()[3])) T(0x5e0e045d, MOV(H29, V2.H()[3]))
T(0x0e043ca1, MOV(W1, V5.S()[0])) T(0x0e043ca1, MOV(W1, V5.S()[0]))
T(0x0e0c3da8, MOV(W8, V13.S()[1]))
T(0x0e143da8, MOV(W8, V13.S()[2]))
T(0x0e1c3da8, MOV(W8, V13.S()[3]))
T(0x4e183d03, MOV(X3, V8.D()[1]))
T(0x4e083df7, MOV(X23, V15.D()[0])) T(0x4e083df7, MOV(X23, V15.D()[0]))
// MOV // MOV
T(0x0f06e58e, MOVI(V14.B8(), 204)) T(0x0f06e58e, MOVI(V14.B8(), 204))