externals: Add oaknut

Merge commit '621367dce8abf82e3924679d72f4da0913cc1520' as 'externals/oaknut'
This commit is contained in:
Merry 2022-07-10 09:35:44 +01:00
commit 501d7ce602
24 changed files with 15088 additions and 0 deletions

View file

@ -28,6 +28,14 @@ if (NOT TARGET merry::mcl)
add_subdirectory(mcl)
endif()
# oaknut
if (NOT TARGET merry::oaknut)
if (ARCHITECTURE STREQUAL "arm64")
add_subdirectory(oaknut)
endif()
endif()
# robin-map
if (NOT TARGET tsl::robin_map)

3
externals/README.md vendored
View file

@ -5,6 +5,7 @@ This repository uses subtrees to manage some of its externals.
```
git remote add externals-fmt https://github.com/fmtlib/fmt.git --no-tags
git remote add externals-mcl https://github.com/merryhime/mcl.git --no-tags
git remote add externals-oaknut https://github.com/merryhime/oaknut.git --no-tags
git remote add externals-robin-map https://github.com/Tessil/robin-map.git --no-tags
git remote add externals-xbyak https://github.com/herumi/xbyak.git --no-tags
git remote add externals-zycore https://github.com/zyantific/zycore-c.git --no-tags
@ -18,12 +19,14 @@ Change `<ref>` to refer to the appropriate git reference.
```
git fetch externals-fmt
git fetch externals-mcl
git fetch externals-oaknut
git fetch externals-robin-map
git fetch externals-xbyak
git fetch externals-zycore
git fetch externals-zydis
git subtree pull --squash --prefix=externals/fmt externals-fmt <ref>
git subtree pull --squash --prefix=externals/mcl externals-mcl <ref>
git subtree pull --squash --prefix=externals/oaknut externals-oaknut <ref>
git subtree pull --squash --prefix=externals/robin-map externals-robin-map <ref>
git subtree pull --squash --prefix=externals/xbyak externals-xbyak <ref>
git subtree pull --squash --prefix=externals/zycore externals-zycore <ref>

218
externals/oaknut/.clang-format vendored Normal file
View file

@ -0,0 +1,218 @@
---
Language: Cpp
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignConsecutiveMacros: None
AlignEscapedNewlines: Right
AlignOperands: AlignAfterOperator
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: false
BitFieldColonSpacing: Both
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Custom
BreakBeforeConceptDeclarations: true
BreakBeforeTernaryOperators: true
BreakBeforeInheritanceComma: false
BreakConstructorInitializersBeforeComma: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
# EmptyLineAfterAccessModifier: Leave
EmptyLineBeforeAccessModifier: Always
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<mach/'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<windows.h>'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<([^\.])*>$'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^<.*\.'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 4
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
# IndentAccessModifiers: false
IndentCaseBlocks: false
IndentCaseLabels: false
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentRequires: false
IndentWidth: 4
IndentWrappedFunctionNames: false
# InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
NamespaceMacros:
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
- ParseTestProto
- ParsePartialTestProto
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
# ShortNamespaceLines: 5
SortIncludes: true
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceAroundPointerQualifiers: Default
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: false
# SpacesInLineCommentPrefix: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
TypenameMacros:
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
- FCODE
- ICODE
...

View file

@ -0,0 +1,38 @@
on: [push, pull_request]
jobs:
test_on_ubuntu:
runs-on: ubuntu-latest
name: Build on ${{ matrix.distro }} ${{ matrix.arch }}
strategy:
matrix:
include:
- arch: aarch64
distro: ubuntu_latest
steps:
- uses: actions/checkout@v3
- uses: uraimo/run-on-arch-action@v2
name: Build and Test
id: build
with:
arch: ${{ matrix.arch }}
distro: ${{ matrix.distro }}
shell: /bin/bash
install: |
apt-get update -q -y
apt-get install -q -y make cmake g++ git
pushd /tmp
git clone https://github.com/catchorg/Catch2.git
cd Catch2
cmake -Bbuild -H. -DBUILD_TESTING=OFF
cmake --build build/ --target install
popd
run: |
cmake -Bbuild -H.
cmake --build build
./build/oaknut-tests

4
externals/oaknut/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
.DS_Store
a.out
work/
*build*/

57
externals/oaknut/CMakeLists.txt vendored Normal file
View file

@ -0,0 +1,57 @@
cmake_minimum_required(VERSION 3.8)
project(oaknut LANGUAGES CXX VERSION 0.0.0)
# Determine if we're built as a subproject (using add_subdirectory)
# or if this is the master project.
set(MASTER_PROJECT OFF)
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(MASTER_PROJECT ON)
endif()
# Disable in-source builds
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(SEND_ERROR "In-source builds are not allowed.")
endif()
# Source project files
set(header_files
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_mnemonics.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/fpsimd_mnemonics.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/multi_typed_name.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/offset.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp
)
# Library definition
add_library(oaknut INTERFACE)
add_library(merry::oaknut ALIAS oaknut)
target_sources(oaknut INTERFACE "$<BUILD_INTERFACE:${header_files}>")
target_include_directories(oaknut INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_compile_features(oaknut INTERFACE cxx_std_20)
# Tests
if (MASTER_PROJECT)
find_package(Catch2 3 REQUIRED)
add_executable(oaknut-tests
tests/basic.cpp
tests/fpsimd.cpp
tests/general.cpp
)
target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests)
target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut)
target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces)
include(CTest)
include(Catch)
catch_discover_tests(oaknut-tests)
enable_testing()
endif()

21
externals/oaknut/LICENSE vendored Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 merryhime <https://mary.rs>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

29
externals/oaknut/README.md vendored Normal file
View file

@ -0,0 +1,29 @@
# Oaknut
*A C++20 assembler for AArch64 (ARMv8.0)*
Oaknut is a header-only library that allows one to dynamically assemble code in-memory at runtime.
## Usage
Simple example:
```cpp
using EmittedFunction = int (*)();
EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
{
using namespace oaknut::util;
EmittedFunction result = code.ptr<EmittedFunction>();
code.MOVZ(W0, value);
code.RET();
return result;
}
```
## License
This project is [MIT licensed](LICENSE).

View file

@ -0,0 +1,124 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <cstddef>
#include <cstdint>
#include <new>
#if defined(_WIN32)
# include <windows.h>
#elif defined(__APPLE__)
# include <libkern/OSCacheControl.h>
# include <pthread.h>
# include <sys/mman.h>
# include <unistd.h>
#else
# include <sys/mman.h>
#endif
namespace oaknut {
class CodeBlock {
public:
explicit CodeBlock(std::size_t size)
: m_size(size)
{
#if defined(_WIN32)
m_memory = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#elif defined(__APPLE__)
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
#else
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
if (m_memory == nullptr)
throw std::bad_alloc{};
}
~CodeBlock()
{
if (m_memory == nullptr)
return;
#if defined(_WIN32)
VirtualFree((void*)m_memory, 0, MEM_RELEASE);
#else
munmap(m_memory, m_size);
#endif
}
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
CodeBlock(CodeBlock&&) = delete;
CodeBlock& operator=(CodeBlock&&) = delete;
std::uint32_t* ptr() const
{
return m_memory;
}
void protect()
{
#if defined(__APPLE__)
pthread_jit_write_protect_np(1);
#endif
}
void unprotect()
{
#if defined(__APPLE__)
pthread_jit_write_protect_np(0);
#endif
}
void invalidate(std::uint32_t* mem, std::size_t size)
{
#if defined(__APPLE__)
sys_icache_invalidate(mem, size);
#else
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
std::uint64_t ctr;
__asm__ volatile("mrs %0, ctr_el0"
: "=r"(ctr));
const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
const std::uintptr_t end = (std::uintptr_t)mem + size;
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
__asm__ volatile("dc cvau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\n"
:
:
: "memory");
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
__asm__ volatile("ic ivau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\nisb\n"
:
:
: "memory");
#endif
}
void invalidate_all()
{
invalidate(m_memory, m_size);
}
protected:
std::uint32_t* m_memory;
std::size_t m_size = 0;
};
} // namespace oaknut

View file

@ -0,0 +1,155 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
template<std::uint32_t mask_>
static constexpr std::uint32_t pdep(std::uint32_t val)
{
std::uint32_t mask = mask_;
std::uint32_t res = 0;
for (std::uint32_t bb = 1; mask; bb += bb) {
if (val & bb)
res |= mask & -mask;
mask &= mask - 1;
}
return res;
}
#define OAKNUT_STD_ENCODE(TYPE, ACCESS, SIZE) \
template<std::uint32_t splat> \
std::uint32_t encode(TYPE v) \
{ \
static_assert(std::popcount(splat) == SIZE); \
return pdep<splat>(static_cast<std::uint32_t>(ACCESS)); \
}
OAKNUT_STD_ENCODE(RReg, v.index() & 31, 5)
OAKNUT_STD_ENCODE(VReg, v.index() & 31, 5)
OAKNUT_STD_ENCODE(VRegArranged, v.index() & 31, 5)
OAKNUT_STD_ENCODE(AddSubImm, v.m_encoded, 13)
OAKNUT_STD_ENCODE(BitImm32, v.m_encoded, 12)
OAKNUT_STD_ENCODE(BitImm64, v.m_encoded, 13)
OAKNUT_STD_ENCODE(LslShift<32>, v.m_encoded, 12)
OAKNUT_STD_ENCODE(LslShift<64>, v.m_encoded, 12)
OAKNUT_STD_ENCODE(FImm8, v.m_encoded, 8)
OAKNUT_STD_ENCODE(RepImm, v.m_encoded, 8)
OAKNUT_STD_ENCODE(Cond, v, 4)
OAKNUT_STD_ENCODE(AddSubExt, v, 3)
OAKNUT_STD_ENCODE(IndexExt, v, 3)
OAKNUT_STD_ENCODE(AddSubShift, v, 2)
OAKNUT_STD_ENCODE(LogShift, v, 2)
OAKNUT_STD_ENCODE(PstateField, v, 6)
OAKNUT_STD_ENCODE(SystemReg, v, 15)
OAKNUT_STD_ENCODE(AtOp, v, 7)
OAKNUT_STD_ENCODE(BarrierOp, v, 4)
OAKNUT_STD_ENCODE(DcOp, v, 10)
OAKNUT_STD_ENCODE(IcOp, v, 10)
OAKNUT_STD_ENCODE(PrfOp, v, 5)
OAKNUT_STD_ENCODE(TlbiOp, v, 10)
template<std::uint32_t splat>
std::uint32_t encode(MovImm16 v)
{
static_assert(std::popcount(splat) == 17 || std::popcount(splat) == 18);
if constexpr (std::popcount(splat) == 17) {
constexpr std::uint32_t mask = (1 << std::popcount(splat)) - 1;
if ((v.m_encoded & mask) != v.m_encoded)
throw "invalid MovImm16";
}
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t imm_size>
std::uint32_t encode(Imm<imm_size> v)
{
static_assert(std::popcount(splat) >= imm_size);
return pdep<splat>(v.value());
}
template<std::uint32_t splat, int A, int B>
std::uint32_t encode(ImmChoice<A, B> v)
{
static_assert(std::popcount(splat) == 1);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, int A, int B, int C, int D>
std::uint32_t encode(ImmChoice<A, B, C, D> v)
{
static_assert(std::popcount(splat) == 2);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(SOffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(POffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat>
std::uint32_t encode(std::uint32_t v)
{
return pdep<splat>(v);
}
template<std::uint32_t splat, typename T, size_t N>
std::uint32_t encode(List<T, N> v)
{
return encode<splat>(v.m_base);
}
#undef OAKNUT_STD_ENCODE
void addsubext_lsl_correction(AddSubExt& ext, XRegSp)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTX;
}
void addsubext_lsl_correction(AddSubExt& ext, WRegWsp)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTW;
}
void addsubext_lsl_correction(AddSubExt& ext, XReg)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTX;
}
void addsubext_lsl_correction(AddSubExt& ext, WReg)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTW;
}
void addsubext_verify_reg_size(AddSubExt ext, RReg rm)
{
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 0b011) != 0b011)
return;
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 0b011) == 0b011)
return;
throw "invalid AddSubExt choice for rm size";
}
void indexext_verify_reg_size(IndexExt ext, RReg rm)
{
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 1) == 0)
return;
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 1) == 1)
return;
throw "invalid IndexExt choice for rm size";
}
void tbz_verify_reg_size(RReg rt, Imm<6> imm)
{
if (rt.bitsize() == 32 && imm.value() >= 32)
throw "invalid imm choice for rt size";
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,242 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
namespace oaknut {
struct PostIndexed {};
struct PreIndexed {};
enum class LslSymbol {
LSL,
};
enum class MslSymbol {
MSL,
};
enum class Cond {
EQ,
NE,
CS,
CC,
MI,
PL,
VS,
VC,
HI,
LS,
GE,
LT,
GT,
LE,
AL,
NV,
HS = CS,
LO = CC,
};
constexpr Cond invert(Cond c)
{
return static_cast<Cond>(static_cast<unsigned>(c) ^ 1);
}
enum class AddSubExt {
UXTB,
UXTH,
UXTW,
UXTX,
SXTB,
SXTH,
SXTW,
SXTX,
LSL, // UXTW (32-bit) or UXTX (64-bit)
};
enum class IndexExt {
UXTW = 0b010,
LSL = 0b011,
SXTW = 0b110,
SXTX = 0b111,
};
enum class AddSubShift {
LSL,
LSR,
ASR,
};
enum class LogShift {
LSL,
LSR,
ASR,
ROR,
};
enum class PstateField {
UAO = 0b000'011, // ARMv8.2-UAO
PAN = 0b000'100, // ARMv8.1-PAN
SPSel = 0b000'101,
DIT = 0b011'010, // ARMv8.4-DIT
DAIFSet = 0b011'110,
DAIFClr = 0b011'111,
};
enum class SystemReg {
};
enum class AtOp {
S1E1R = 0b000'0'000,
S1E1W = 0b000'0'001,
S1E0R = 0b000'0'010,
S1E0W = 0b000'0'011,
S1E1RP = 0b000'1'000, // ARMv8.2-ATS1E1
S1E1WP = 0b000'1'001, // ARMv8.2-ATS1E1
S1E2R = 0b100'0'000,
S1E2W = 0b100'0'001,
S12E1R = 0b100'0'100,
S12E1W = 0b100'0'101,
S12E0R = 0b100'0'110,
S12E0W = 0b100'0'111,
S1E3R = 0b110'0'000,
S1E3W = 0b110'0'001,
};
enum class BarrierOp {
SY = 0b1111,
ST = 0b1110,
LD = 0b1101,
ISH = 0b1011,
ISHST = 0b1010,
ISHLD = 0b1001,
NSH = 0b0111,
NSHST = 0b0110,
NSHLD = 0b0101,
OSH = 0b0011,
OSHST = 0b0010,
OSHLD = 0b0001,
};
enum class DcOp {
IVAC = 0b000'0110'001,
ISW = 0b000'0110'010,
CSW = 0b000'1010'010,
CISW = 0b000'1110'010,
ZVA = 0b011'0100'001,
CVAC = 0b011'1010'001,
CVAU = 0b011'1011'001,
CVAP = 0b011'1100'001, // ARMv8.2-DCPoP
CIVAC = 0b011'1110'001,
};
enum class IcOp {
IALLUIS = 0b000'0001'000,
IALLU = 0b000'0101'000,
IVAU = 0b011'0101'001,
};
enum class PrfOp {
PLDL1KEEP = 0b00'00'0,
PLDL1STRM = 0b00'00'1,
PLDL2KEEP = 0b00'01'0,
PLDL2STRM = 0b00'01'1,
PLDL3KEEP = 0b00'10'0,
PLDL3STRM = 0b00'10'1,
PLIL1KEEP = 0b01'00'0,
PLIL1STRM = 0b01'00'1,
PLIL2KEEP = 0b01'01'0,
PLIL2STRM = 0b01'01'1,
PLIL3KEEP = 0b01'10'0,
PLIL3STRM = 0b01'10'1,
PSTL1KEEP = 0b10'00'0,
PSTL1STRM = 0b10'00'1,
PSTL2KEEP = 0b10'01'0,
PSTL2STRM = 0b10'01'1,
PSTL3KEEP = 0b10'10'0,
PSTL3STRM = 0b10'10'1,
};
enum class TlbiOp {
VMALLE1OS = 0b000'0001'000, // ARMv8.4-TLBI
VAE1OS = 0b000'0001'001, // ARMv8.4-TLBI
ASIDE1OS = 0b000'0001'010, // ARMv8.4-TLBI
VAAE1OS = 0b000'0001'011, // ARMv8.4-TLBI
VALE1OS = 0b000'0001'101, // ARMv8.4-TLBI
VAALE1OS = 0b000'0001'111, // ARMv8.4-TLBI
RVAE1IS = 0b000'0010'001, // ARMv8.4-TLBI
RVAAE1IS = 0b000'0010'011, // ARMv8.4-TLBI
RVALE1IS = 0b000'0010'101, // ARMv8.4-TLBI
RVAALE1IS = 0b000'0010'111, // ARMv8.4-TLBI
VMALLE1IS = 0b000'0011'000,
VAE1IS = 0b000'0011'001,
ASIDE1IS = 0b000'0011'010,
VAAE1IS = 0b000'0011'011,
VALE1IS = 0b000'0011'101,
VAALE1IS = 0b000'0011'111,
RVAE1OS = 0b000'0101'001, // ARMv8.4-TLBI
RVAAE1OS = 0b000'0101'011, // ARMv8.4-TLBI
RVALE1OS = 0b000'0101'101, // ARMv8.4-TLBI
RVAALE1OS = 0b000'0101'111, // ARMv8.4-TLBI
RVAE1 = 0b000'0110'001, // ARMv8.4-TLBI
RVAAE1 = 0b000'0110'011, // ARMv8.4-TLBI
RVALE1 = 0b000'0110'101, // ARMv8.4-TLBI
RVAALE1 = 0b000'0110'111, // ARMv8.4-TLBI
VMALLE1 = 0b000'0111'000,
VAE1 = 0b000'0111'001,
ASIDE1 = 0b000'0111'010,
VAAE1 = 0b000'0111'011,
VALE1 = 0b000'0111'101,
VAALE1 = 0b000'0111'111,
IPAS2E1IS = 0b100'0000'001,
RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI
IPAS2LE1IS = 0b100'0000'101,
RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI
ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI
VAE2OS = 0b100'0001'001, // ARMv8.4-TLBI
ALLE1OS = 0b100'0001'100, // ARMv8.4-TLBI
VALE2OS = 0b100'0001'101, // ARMv8.4-TLBI
VMALLS12E1OS = 0b100'0001'110, // ARMv8.4-TLBI
RVAE2IS = 0b100'0010'001, // ARMv8.4-TLBI
RVALE2IS = 0b100'0010'101, // ARMv8.4-TLBI
ALLE2IS = 0b100'0011'000,
VAE2IS = 0b100'0011'001,
ALLE1IS = 0b100'0011'100,
VALE2IS = 0b100'0011'101,
VMALLS12E1IS = 0b100'0011'110,
IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI
IPAS2E1 = 0b100'0100'001,
RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI
RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI
IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI
IPAS2LE1 = 0b100'0100'101,
RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI
RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI
RVAE2OS = 0b100'0101'001, // ARMv8.4-TLBI
RVALE2OS = 0b100'0101'101, // ARMv8.4-TLBI
RVAE2 = 0b100'0110'001, // ARMv8.4-TLBI
RVALE2 = 0b100'0110'101, // ARMv8.4-TLBI
ALLE2 = 0b100'0111'000,
VAE2 = 0b100'0111'001,
ALLE1 = 0b100'0111'100,
VALE2 = 0b100'0111'101,
VMALLS12E1 = 0b100'0111'110,
ALLE3OS = 0b110'0001'000, // ARMv8.4-TLBI
VAE3OS = 0b110'0001'001, // ARMv8.4-TLBI
VALE3OS = 0b110'0001'101, // ARMv8.4-TLBI
RVAE3IS = 0b110'0010'001, // ARMv8.4-TLBI
RVALE3IS = 0b110'0010'101, // ARMv8.4-TLBI
ALLE3IS = 0b110'0011'000,
VAE3IS = 0b110'0011'001,
VALE3IS = 0b110'0011'101,
RVAE3OS = 0b110'0101'001, // ARMv8.4-TLBI
RVALE3OS = 0b110'0101'101, // ARMv8.4-TLBI
RVAE3 = 0b110'0110'001, // ARMv8.4-TLBI
RVALE3 = 0b110'0110'101, // ARMv8.4-TLBI
ALLE3 = 0b110'0111'000,
VAE3 = 0b110'0111'001,
VALE3 = 0b110'0111'101,
};
} // namespace oaknut

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,317 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <bit>
#include <compare>
#include <cstddef>
#include <cstdint>
#include <optional>
namespace oaknut {
template<std::size_t bit_size_>
struct Imm {
public:
static_assert(bit_size_ != 0 && bit_size_ <= 32, "Invalid bit_size");
static constexpr std::size_t bit_size = bit_size_;
static constexpr std::uint32_t mask = (1 << bit_size) - 1;
constexpr /* implicit */ Imm(std::uint32_t value_)
: m_value(value_)
{
if (!is_valid(value_))
throw "outsized Imm value";
}
constexpr auto operator<=>(const Imm& other) const { return m_value <=> other.m_value; }
constexpr auto operator<=>(std::uint32_t other) const { return operator<=>(Imm{other}); }
constexpr std::uint32_t value() const { return m_value; }
static bool is_valid(std::uint32_t value_)
{
return ((value_ & mask) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_value;
};
enum class AddSubImmShift {
SHL_0,
SHL_12,
};
struct AddSubImm {
public:
constexpr AddSubImm(std::uint32_t value_, AddSubImmShift shift_)
: m_encoded(value_ | ((shift_ == AddSubImmShift::SHL_12) ? 1 << 12 : 0))
{
if ((value_ & 0xFFF) != value_)
throw "invalid AddSubImm";
}
constexpr /* implicit */ AddSubImm(std::uint64_t value_)
{
if ((value_ & 0xFFF) == value_) {
m_encoded = value_;
} else if ((value_ & 0xFFF000) == value_) {
m_encoded = (value_ >> 12) | (1 << 12);
} else {
throw "invalid AddSubImm";
}
}
static constexpr bool is_valid(std::uint64_t value_)
{
return ((value_ & 0xFFF) == value_) || ((value_ & 0xFFF000) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
enum class MovImm16Shift {
SHL_0,
SHL_16,
SHL_32,
SHL_48,
};
struct MovImm16 {
public:
MovImm16(std::uint16_t value_, MovImm16Shift shift_)
: m_encoded(static_cast<std::uint32_t>(value_) | (static_cast<std::uint32_t>(shift_) << 16))
{}
constexpr /* implict */ MovImm16(std::uint64_t value_)
{
std::uint32_t shift = 0;
while (value_ != 0) {
const std::uint32_t lsw = static_cast<std::uint16_t>(value_ & 0xFFFF);
if (value_ == lsw) {
m_encoded = lsw | (shift << 16);
return;
} else if (lsw != 0) {
throw "invalid MovImm16";
}
value_ >>= 16;
shift++;
}
}
static constexpr bool is_valid(std::uint64_t value_)
{
return ((value_ & 0xFFFF) == value_) || ((value_ & 0xFFFF0000) == value_) || ((value_ & 0xFFFF00000000) == value_) || ((value_ & 0xFFFF000000000000) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded = 0;
};
namespace detail {
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint64_t value)
{
if (value == 0 || (~value) == 0)
return std::nullopt;
const std::size_t rotation = std::countr_zero(value & (value + 1));
const std::uint64_t rot_value = std::rotr(value, rotation);
const std::size_t esize = std::countr_zero(rot_value & (rot_value + 1));
const std::size_t ones = std::countr_one(rot_value);
if (std::rotr(value, esize) != value)
return std::nullopt;
const std::uint32_t S = ((-esize) << 1) | (ones - 1);
const std::uint32_t R = (esize - rotation) & (esize - 1);
const std::uint32_t N = (~S >> 6) & 1;
return static_cast<std::uint32_t>((S & 0b111111) | (R << 6) | (N << 12));
}
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint32_t value)
{
const std::uint64_t value_u64 = (static_cast<std::uint64_t>(value) << 32) | static_cast<std::uint64_t>(value);
const auto result = encode_bit_imm(value_u64);
if (result && (*result & 0b0'111111'111111) != *result)
return std::nullopt;
return result;
}
} // namespace detail
struct BitImm32 {
public:
constexpr BitImm32(Imm<6> imms, Imm<6> immr)
: m_encoded((imms.value() << 6) | immr.value())
{}
constexpr /* implicit */ BitImm32(std::uint32_t value)
{
const auto encoded = detail::encode_bit_imm(value);
if (!encoded || (*encoded & 0x1000) != 0)
throw "invalid BitImm32";
m_encoded = *encoded;
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct BitImm64 {
public:
constexpr BitImm64(bool N, Imm<6> imms, Imm<6> immr)
: m_encoded((N ? 1 << 12 : 0) | (imms.value() << 6) | immr.value())
{}
constexpr /* implicit */ BitImm64(std::uint64_t value)
{
const auto encoded = detail::encode_bit_imm(value);
if (!encoded)
throw "invalid BitImm64";
m_encoded = *encoded;
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct FImm8 {
public:
constexpr explicit FImm8(std::uint8_t encoded)
: m_encoded(encoded)
{}
constexpr FImm8(bool sign, Imm<3> exp, Imm<4> mantissa)
: m_encoded((sign ? 1 << 7 : 0) | (exp.value() << 4) | (mantissa.value()))
{}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct RepImm {
public:
constexpr explicit RepImm(std::uint8_t encoded)
: m_encoded(encoded)
{}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<int A>
struct ImmConst {
constexpr /* implicit */ ImmConst(int value)
{
if (value != A) {
throw "invalid ImmConst";
}
}
};
struct ImmConstFZero {
constexpr /* implicit */ ImmConstFZero(double value)
{
if (value != 0) {
throw "invalid ImmConstFZero";
}
}
};
template<int...>
struct ImmChoice;
template<int A, int B>
struct ImmChoice<A, B> {
constexpr /* implicit */ ImmChoice(int value)
{
if (value == A) {
m_encoded = 0;
} else if (value == B) {
m_encoded = 1;
} else {
throw "invalid ImmChoice";
}
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<int A, int B, int C, int D>
struct ImmChoice<A, B, C, D> {
constexpr /* implicit */ ImmChoice(int value)
{
if (value == A) {
m_encoded = 0;
} else if (value == B) {
m_encoded = 1;
} else if (value == C) {
m_encoded = 2;
} else if (value == D) {
m_encoded = 3;
} else {
throw "invalid ImmChoice";
}
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<unsigned Start, unsigned End>
struct ImmRange {
constexpr /* implicit */ ImmRange(unsigned value_)
: m_value(value_)
{
if (value_ < Start || value_ > End) {
throw "invalid ImmRange";
}
}
constexpr unsigned value() const { return m_value; }
private:
unsigned m_value;
};
template<std::size_t max_value>
struct LslShift {
constexpr /* implicit */ LslShift(std::size_t amount)
: m_encoded((((-amount) & (max_value - 1)) << 6) | (max_value - amount - 1))
{
if (amount >= max_value)
throw "LslShift out of range";
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
} // namespace oaknut

View file

@ -0,0 +1,80 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <tuple>
#include <type_traits>
namespace oaknut {
struct Elem;
template<typename>
struct ElemSelector;
struct VRegArranged;
namespace detail {
template<typename>
struct is_instance_of_ElemSelector : std::false_type {};
template<typename E>
struct is_instance_of_ElemSelector<ElemSelector<E>> : std::true_type {};
template<class T>
constexpr bool is_instance_of_ElemSelector_v = is_instance_of_ElemSelector<T>::value;
struct BaseOnlyTag {};
} // namespace detail
template<typename T, std::size_t N>
struct List {
template<typename... U>
constexpr explicit List(U... args)
: m_base(std::get<0>(std::tie(args...)))
{
static_assert((std::is_same_v<T, U> && ...));
static_assert(sizeof...(args) == N);
static_assert(std::is_base_of_v<VRegArranged, T> || std::is_base_of_v<Elem, T> || detail::is_instance_of_ElemSelector_v<T>);
if (!verify(std::index_sequence_for<U...>{}, args...))
throw "invalid List";
}
constexpr auto operator[](unsigned elem_index) const
{
using S = decltype(m_base[elem_index]);
return List<S, N>(detail::BaseOnlyTag{}, m_base[elem_index]);
}
private:
template<typename>
friend class BasicCodeGenerator;
template<typename, std::size_t>
friend struct List;
constexpr explicit List(detail::BaseOnlyTag, T base_)
: m_base(base_)
{}
template<typename... U, std::size_t... indexes>
constexpr bool verify(std::index_sequence<indexes...>, U... args)
{
if constexpr (std::is_base_of_v<VRegArranged, T>) {
return (((m_base.index() + indexes) % 32 == static_cast<std::size_t>(args.index())) && ...);
} else if constexpr (std::is_base_of_v<Elem, T>) {
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index()) && m_base.elem_index() == args.elem_index()) && ...);
} else {
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index())) && ...);
}
}
T m_base;
};
template<typename... U>
List(U...) -> List<std::common_type_t<U...>, sizeof...(U)>;
} // namespace oaknut

View file

@ -0,0 +1,19 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
namespace oaknut {
template<auto... Vs>
struct MultiTypedName;
template<>
struct MultiTypedName<> {};
template<auto V, auto... Vs>
struct MultiTypedName<V, Vs...> : public MultiTypedName<Vs...> {
constexpr operator decltype(V)() const { return V; }
};
} // namespace oaknut

View file

@ -0,0 +1,129 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <cstdint>
#include <variant>
namespace oaknut {
struct Label;
namespace detail {
constexpr std::uint64_t inverse_mask_from_size(std::size_t size)
{
return (~std::uint64_t{0}) << size;
}
constexpr std::uint64_t mask_from_size(std::size_t size)
{
return (~std::uint64_t{0}) >> (64 - size);
}
template<std::size_t bit_count>
constexpr std::uint64_t sign_extend(std::uint64_t value)
{
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
constexpr size_t shift_amount = 64 - bit_count;
return static_cast<std::uint64_t>(static_cast<std::int64_t>(value << shift_amount) >> shift_amount);
}
} // namespace detail
template<std::size_t bitsize, std::size_t alignment>
struct AddrOffset {
AddrOffset(std::ptrdiff_t diff)
: m_payload(encode(diff))
{}
AddrOffset(Label& label)
: m_payload(&label)
{}
AddrOffset(void* ptr)
: m_payload(ptr)
{}
static std::uint32_t encode(std::ptrdiff_t diff)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(diff);
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
throw "out of range";
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw "misalignment";
return static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::variant<std::uint32_t, Label*, void*> m_payload;
};
template<std::size_t bitsize>
struct PageOffset {
PageOffset(void* ptr)
: m_payload(ptr)
{}
PageOffset(Label& label)
: m_payload(&label)
{}
static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target)
{
const std::int64_t page_diff = (static_cast<std::int64_t>(target) >> 12) - (static_cast<std::int64_t>(current_addr) >> 12);
if (detail::sign_extend<bitsize>(page_diff) != page_diff)
throw "out of range";
return static_cast<std::uint32_t>(page_diff & detail::mask_from_size(bitsize));
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::variant<Label*, void*> m_payload;
};
template<std::size_t bitsize, std::size_t alignment>
struct SOffset {
SOffset(std::int64_t offset)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
throw "out of range";
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw "misalignment";
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<std::size_t bitsize, std::size_t alignment>
struct POffset {
POffset(std::int64_t offset)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
if (diff_u64 > detail::mask_from_size(bitsize))
throw "out of range";
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw "misalignment";
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
} // namespace oaknut

View file

@ -0,0 +1,441 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cassert>
#include <cstddef>
#include <cstdint>
namespace oaknut {
struct Reg;
struct RReg;
struct ZrReg;
struct WzrReg;
struct XReg;
struct WReg;
struct SpReg;
struct WspReg;
struct XRegSp;
struct XRegWsp;
struct VReg;
struct VRegArranged;
struct BReg;
struct HReg;
struct SReg;
struct DReg;
struct QReg;
struct VReg_8B;
struct VReg_4H;
struct VReg_2S;
struct VReg_1D;
struct VReg_16B;
struct VReg_8H;
struct VReg_4S;
struct VReg_2D;
struct VReg_1Q;
struct VRegSelector;
template<typename Elem>
struct ElemSelector;
struct BElem;
struct HElem;
struct SElem;
struct DElem;
struct Reg {
constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_)
: m_index(index_)
, m_bitsize(bitsize_)
, m_is_vector(is_vector_)
{
assert(index_ >= -1 && index_ <= 31);
assert(bitsize_ != 0 && (bitsize_ & (bitsize_ - 1)) == 0 && "Bitsize must be a power of two");
}
constexpr int index() const { return m_index; }
constexpr unsigned bitsize() const { return m_bitsize; }
constexpr bool is_vector() const { return m_is_vector; }
private:
int m_index : 8;
unsigned m_bitsize : 8;
bool m_is_vector;
};
struct RReg : public Reg {
constexpr explicit RReg(unsigned bitsize_, int index_)
: Reg(false, bitsize_, index_)
{
assert(bitsize_ == 32 || bitsize_ == 64);
}
XReg toX() const;
WReg toW() const;
template<typename Policy>
friend class BasicCodeGenerator;
};
struct ZrReg : public RReg {
constexpr explicit ZrReg()
: RReg(64, 31) {}
};
struct WzrReg : public RReg {
constexpr explicit WzrReg()
: RReg(32, 31) {}
};
struct XReg : public RReg {
constexpr explicit XReg(int index_)
: RReg(64, index_) {}
constexpr /* implicit */ XReg(ZrReg)
: RReg(64, 31) {}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct WReg : public RReg {
constexpr explicit WReg(int index_)
: RReg(32, index_) {}
constexpr /* implicit */ WReg(WzrReg)
: RReg(32, 31) {}
template<typename Policy>
friend class BasicCodeGenerator;
};
inline XReg RReg::toX() const
{
if (index() == -1)
throw "cannot convert SP/WSP to XReg";
return XReg{index()};
}
inline WReg RReg::toW() const
{
if (index() == -1)
throw "cannot convert SP/WSP to WReg";
return WReg{index()};
}
struct SpReg : public RReg {
constexpr explicit SpReg()
: RReg(64, -1) {}
};
struct WspReg : public RReg {
constexpr explicit WspReg()
: RReg(64, -1) {}
};
struct XRegSp : public RReg {
constexpr /* implict */ XRegSp(SpReg)
: RReg(64, -1) {}
constexpr /* implict */ XRegSp(XReg xr)
: RReg(64, xr.index())
{
if (xr.index() == 31)
throw "unexpected ZR passed into an XRegSp";
}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct WRegWsp : public RReg {
constexpr /* implict */ WRegWsp(WspReg)
: RReg(32, -1) {}
constexpr /* implict */ WRegWsp(WReg wr)
: RReg(32, wr.index())
{
if (wr.index() == 31)
throw "unexpected WZR passed into an WRegWsp";
}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg : public Reg {
constexpr explicit VReg(unsigned bitsize_, int index_)
: Reg(true, bitsize_, index_)
{
assert(bitsize_ == 8 || bitsize_ == 16 || bitsize_ == 32 || bitsize_ == 64 || bitsize_ == 128);
}
constexpr BReg toB() const;
constexpr HReg toH() const;
constexpr SReg toS() const;
constexpr DReg toD() const;
constexpr QReg toQ() const;
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VRegArranged : public Reg {
constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_)
: Reg(true, bitsize_, index_), m_esize(esize_)
{
assert(bitsize_ == 64 || bitsize_ == 128);
assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two");
assert(esize_ <= bitsize_);
}
template<typename Policy>
friend class BasicCodeGenerator;
private:
int m_esize : 8;
};
struct BReg : public VReg {
constexpr explicit BReg(int index_)
: VReg(8, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct HReg : public VReg {
constexpr explicit HReg(int index_)
: VReg(16, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct SReg : public VReg {
constexpr explicit SReg(int index_)
: VReg(32, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct DReg : public VReg {
constexpr explicit DReg(int index_)
: VReg(64, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct QReg : public VReg {
constexpr explicit QReg(int index_)
: VReg(128, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_8B : public VRegArranged {
constexpr explicit VReg_8B(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 8)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_4H : public VRegArranged {
constexpr explicit VReg_4H(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 4)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_2S : public VRegArranged {
constexpr explicit VReg_2S(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_1D : public VRegArranged {
constexpr explicit VReg_1D(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 1)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_16B : public VRegArranged {
constexpr explicit VReg_16B(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 16)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_8H : public VRegArranged {
constexpr explicit VReg_8H(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 8)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_4S : public VRegArranged {
constexpr explicit VReg_4S(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 4)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_2D : public VRegArranged {
constexpr explicit VReg_2D(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_1Q : public VRegArranged {
constexpr explicit VReg_1Q(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 1)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct Elem {
constexpr explicit Elem(unsigned esize_, int reg_, unsigned elem_index_)
: m_esize(esize_), m_reg(reg_), m_elem_index(elem_index_)
{
if (elem_index_ >= 128 / esize_)
throw "invalid elem_index";
}
constexpr unsigned esize() const { return m_esize; }
constexpr int reg_index() const { return m_reg; }
constexpr unsigned elem_index() const { return m_elem_index; }
private:
unsigned m_esize;
int m_reg;
unsigned m_elem_index;
};
template<typename E>
struct ElemSelector {
constexpr explicit ElemSelector(int reg_index_)
: m_reg_index(reg_index_)
{}
constexpr int reg_index() const { return m_reg_index; }
constexpr E operator[](unsigned elem_index) const { return E{m_reg_index, elem_index}; }
private:
int m_reg_index;
};
struct BElem : public Elem {
constexpr explicit BElem(int reg_, unsigned elem_index_)
: Elem(2, reg_, elem_index_)
{}
};
struct HElem : public Elem {
constexpr explicit HElem(int reg_, unsigned elem_index_)
: Elem(2, reg_, elem_index_)
{}
};
struct SElem : public Elem {
constexpr explicit SElem(int reg_, unsigned elem_index_)
: Elem(4, reg_, elem_index_)
{}
};
struct DElem : public Elem {
constexpr explicit DElem(int reg_, unsigned elem_index_)
: Elem(8, reg_, elem_index_)
{}
};
struct DElem_1 : public DElem {
constexpr /* implict */ DElem_1(DElem inner)
: DElem(inner)
{
if (inner.elem_index() != 1)
throw "invalid DElem_1";
}
};
constexpr BReg VReg::toB() const
{
return BReg{index()};
}
constexpr HReg VReg::toH() const
{
return HReg{index()};
}
constexpr SReg VReg::toS() const
{
return SReg{index()};
}
constexpr DReg VReg::toD() const
{
return DReg{index()};
}
constexpr QReg VReg::toQ() const
{
return QReg{index()};
}
struct VRegSelector {
constexpr explicit VRegSelector(int reg_index)
: m_reg_index(reg_index)
{}
constexpr int index() const { return m_reg_index; }
constexpr ElemSelector<BElem> B() const { return ElemSelector<BElem>(index()); }
constexpr ElemSelector<HElem> H() const { return ElemSelector<HElem>(index()); }
constexpr ElemSelector<SElem> S() const { return ElemSelector<SElem>(index()); }
constexpr ElemSelector<DElem> D() const { return ElemSelector<DElem>(index()); }
constexpr VReg_8B B8() const { return VReg_8B{index()}; }
constexpr VReg_4H H4() const { return VReg_4H{index()}; }
constexpr VReg_2S S2() const { return VReg_2S{index()}; }
constexpr VReg_1D D1() const { return VReg_1D{index()}; }
constexpr VReg_16B B16() const { return VReg_16B{index()}; }
constexpr VReg_8H H8() const { return VReg_8H{index()}; }
constexpr VReg_4S S4() const { return VReg_4S{index()}; }
constexpr VReg_2D D2() const { return VReg_2D{index()}; }
constexpr VReg_1Q Q1() const { return VReg_1Q{index()}; }
private:
int m_reg_index;
};
} // namespace oaknut

View file

@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include <cstddef>
namespace oaknut {
template<size_t N>
struct StringLiteral {
constexpr StringLiteral(const char (&str)[N])
{
std::copy_n(str, N, value);
}
static constexpr std::size_t strlen = N - 1;
static constexpr std::size_t size = N;
char value[N];
};
} // namespace oaknut

View file

@ -0,0 +1,306 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <bit>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <tuple>
#include <type_traits>
#include <variant>
#include <vector>
#include "oaknut/impl/enum.hpp"
#include "oaknut/impl/imm.hpp"
#include "oaknut/impl/list.hpp"
#include "oaknut/impl/multi_typed_name.hpp"
#include "oaknut/impl/offset.hpp"
#include "oaknut/impl/reg.hpp"
#include "oaknut/impl/string_literal.hpp"
namespace oaknut {
namespace detail {
template<StringLiteral bs, StringLiteral barg>
constexpr std::uint32_t get_bits()
{
std::uint32_t result = 0;
for (std::size_t i = 0; i < 32; i++) {
for (std::size_t a = 0; a < barg.strlen; a++) {
if (bs.value[i] == barg.value[a]) {
result |= 1 << (31 - i);
}
}
}
return result;
}
template<class... Ts>
struct overloaded : Ts... {
using Ts::operator()...;
};
template<class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;
} // namespace detail
struct Label {
public:
Label() = default;
private:
template<typename Policy>
friend class BasicCodeGenerator;
explicit Label(std::uintptr_t addr)
: m_addr(addr)
{}
using EmitFunctionType = std::uint32_t (*)(std::uintptr_t wb_addr, std::uintptr_t resolved_addr);
struct Writeback {
std::uintptr_t m_wb_addr;
std::uint32_t m_mask;
EmitFunctionType m_fn;
};
std::optional<std::uintptr_t> m_addr;
std::vector<Writeback> m_wbs;
};
template<typename Policy>
class BasicCodeGenerator : public Policy {
public:
BasicCodeGenerator(typename Policy::constructor_argument_type arg)
: Policy(arg)
{}
Label l()
{
return Label{Policy::current_address()};
}
void l(Label& label)
{
if (label.m_addr)
throw "label already resolved";
const auto target_addr = Policy::current_address();
label.m_addr = target_addr;
for (auto& wb : label.m_wbs) {
const std::uint32_t value = wb.m_fn(wb.m_wb_addr, target_addr);
Policy::set_at_address(wb.m_wb_addr, value, wb.m_mask);
}
label.m_wbs.clear();
}
#include "oaknut/impl/arm64_mnemonics.inc.hpp"
#include "oaknut/impl/fpsimd_mnemonics.inc.hpp"
void RET()
{
return RET(XReg{30});
}
void MOV(WReg wd, uint32_t imm)
{
if (wd.index() == 31)
return;
if (MovImm16::is_valid(imm))
return MOVZ(wd, imm);
if (MovImm16::is_valid(~static_cast<std::uint64_t>(imm)))
return MOVN(wd, imm);
if (detail::encode_bit_imm(imm))
return ORR(wd, WzrReg{}, imm);
MOVZ(wd, {static_cast<std::uint16_t>(imm >> 0), MovImm16Shift::SHL_0});
MOVK(wd, {static_cast<std::uint16_t>(imm >> 16), MovImm16Shift::SHL_16});
}
void MOV(XReg xd, uint64_t imm)
{
if (xd.index() == 31)
return;
if (imm >> 32 == 0)
return MOV(xd.toW(), static_cast<std::uint32_t>(imm));
if (MovImm16::is_valid(imm))
return MOVZ(xd, imm);
if (MovImm16::is_valid(~imm))
return MOVN(xd, imm);
if (detail::encode_bit_imm(imm))
return ORR(xd, ZrReg{}, imm);
bool movz_done = false;
int shift_count = 0;
if (detail::encode_bit_imm(static_cast<std::uint32_t>(imm))) {
ORR(xd.toW(), WzrReg{}, static_cast<std::uint32_t>(imm));
imm >>= 32;
movz_done = true;
shift_count = 2;
}
while (imm != 0) {
const uint16_t hw = static_cast<uint16_t>(imm);
if (hw != 0) {
if (movz_done) {
MOVK(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
} else {
MOVZ(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
movz_done = true;
}
}
imm >>= 16;
shift_count++;
}
}
private:
#include "oaknut/impl/arm64_encode_helpers.inc.hpp"
template<StringLiteral bs, StringLiteral... bargs, typename... Ts>
void emit(Ts... args)
{
std::uint32_t encoding = detail::get_bits<bs, "1">();
encoding |= (0 | ... | encode<detail::get_bits<bs, bargs>()>(std::forward<Ts>(args)));
Policy::append(encoding);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(AddrOffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
const std::ptrdiff_t diff = target - current_addr;
return pdep<splat>(AddrOffset<size, align>::encode(diff));
};
return std::visit(detail::overloaded{
[&](std::uint32_t encoding) {
return pdep<splat>(encoding);
},
[&](Label* label) {
if (label->m_addr) {
return encode_fn(Policy::current_address(), *label->m_addr);
}
label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&](void* p) {
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
},
},
v.m_payload);
}
template<std::uint32_t splat, std::size_t size>
std::uint32_t encode(PageOffset<size> v)
{
static_assert(std::popcount(splat) == size);
const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
return pdep<splat>(PageOffset<size>::encode(current_addr, target));
};
return std::visit(detail::overloaded{
[&](Label* label) {
if (label->m_addr) {
return encode_fn(Policy::current_address(), *label->m_addr);
}
label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&](void* p) {
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
},
},
v.m_payload);
}
};
struct PointerCodeGeneratorPolicy {
public:
template<typename T>
T ptr()
{
static_assert(std::is_pointer_v<T>);
return reinterpret_cast<T>(m_ptr);
}
void set_ptr(std::uint32_t* ptr_)
{
m_ptr = ptr_;
}
protected:
using constructor_argument_type = std::uint32_t*;
PointerCodeGeneratorPolicy(std::uint32_t* ptr_)
: m_ptr(ptr_)
{}
void append(std::uint32_t instruction)
{
*m_ptr++ = instruction;
}
std::uintptr_t current_address()
{
return reinterpret_cast<std::uintptr_t>(m_ptr);
}
void set_at_address(std::uintptr_t addr, std::uint32_t value, std::uint32_t mask)
{
std::uint32_t* p = reinterpret_cast<std::uint32_t*>(addr);
*p = (*p & mask) | value;
}
private:
std::uint32_t* m_ptr;
};
using CodeGenerator = BasicCodeGenerator<PointerCodeGeneratorPolicy>;
namespace util {
inline constexpr WReg W0{0}, W1{1}, W2{2}, W3{3}, W4{4}, W5{5}, W6{6}, W7{7}, W8{8}, W9{9}, W10{10}, W11{11}, W12{12}, W13{13}, W14{14}, W15{15}, W16{16}, W17{17}, W18{18}, W19{19}, W20{20}, W21{21}, W22{22}, W23{23}, W24{24}, W25{25}, W26{26}, W27{27}, W28{28}, W29{29}, W30{30};
inline constexpr XReg X0{0}, X1{1}, X2{2}, X3{3}, X4{4}, X5{5}, X6{6}, X7{7}, X8{8}, X9{9}, X10{10}, X11{11}, X12{12}, X13{13}, X14{14}, X15{15}, X16{16}, X17{17}, X18{18}, X19{19}, X20{20}, X21{21}, X22{22}, X23{23}, X24{24}, X25{25}, X26{26}, X27{27}, X28{28}, X29{29}, X30{30};
inline constexpr ZrReg ZR{}, XZR{};
inline constexpr WzrReg WZR{};
inline constexpr SpReg SP{}, XSP{};
inline constexpr WspReg WSP{};
inline constexpr VRegSelector V0{0}, V1{1}, V2{2}, V3{3}, V4{4}, V5{5}, V6{6}, V7{7}, V8{8}, V9{9}, V10{10}, V11{11}, V12{12}, V13{13}, V14{14}, V15{15}, V16{16}, V17{17}, V18{18}, V19{19}, V20{20}, V21{21}, V22{22}, V23{23}, V24{24}, V25{25}, V26{26}, V27{27}, V28{28}, V29{29}, V30{30}, V31{31};
inline constexpr QReg Q0{0}, Q1{1}, Q2{2}, Q3{3}, Q4{4}, Q5{5}, Q6{6}, Q7{7}, Q8{8}, Q9{9}, Q10{10}, Q11{11}, Q12{12}, Q13{13}, Q14{14}, Q15{15}, Q16{16}, Q17{17}, Q18{18}, Q19{19}, Q20{20}, Q21{21}, Q22{22}, Q23{23}, Q24{24}, Q25{25}, Q26{26}, Q27{27}, Q28{28}, Q29{29}, Q30{30}, Q31{31};
inline constexpr DReg D0{0}, D1{1}, D2{2}, D3{3}, D4{4}, D5{5}, D6{6}, D7{7}, D8{8}, D9{9}, D10{10}, D11{11}, D12{12}, D13{13}, D14{14}, D15{15}, D16{16}, D17{17}, D18{18}, D19{19}, D20{20}, D21{21}, D22{22}, D23{23}, D24{24}, D25{25}, D26{26}, D27{27}, D28{28}, D29{29}, D30{30}, D31{31};
inline constexpr SReg S0{0}, S1{1}, S2{2}, S3{3}, S4{4}, S5{5}, S6{6}, S7{7}, S8{8}, S9{9}, S10{10}, S11{11}, S12{12}, S13{13}, S14{14}, S15{15}, S16{16}, S17{17}, S18{18}, S19{19}, S20{20}, S21{21}, S22{22}, S23{23}, S24{24}, S25{25}, S26{26}, S27{27}, S28{28}, S29{29}, S30{30}, S31{31};
inline constexpr HReg H0{0}, H1{1}, H2{2}, H3{3}, H4{4}, H5{5}, H6{6}, H7{7}, H8{8}, H9{9}, H10{10}, H11{11}, H12{12}, H13{13}, H14{14}, H15{15}, H16{16}, H17{17}, H18{18}, H19{19}, H20{20}, H21{21}, H22{22}, H23{23}, H24{24}, H25{25}, H26{26}, H27{27}, H28{28}, H29{29}, H30{30}, H31{31};
inline constexpr BReg B0{0}, B1{1}, B2{2}, B3{3}, B4{4}, B5{5}, B6{6}, B7{7}, B8{8}, B9{9}, B10{10}, B11{11}, B12{12}, B13{13}, B14{14}, B15{15}, B16{16}, B17{17}, B18{18}, B19{19}, B20{20}, B21{21}, B22{22}, B23{23}, B24{24}, B25{25}, B26{26}, B27{27}, B28{28}, B29{29}, B30{30}, B31{31};
inline constexpr Cond EQ{Cond::EQ}, NE{Cond::NE}, CS{Cond::CS}, CC{Cond::CC}, MI{Cond::MI}, PL{Cond::PL}, VS{Cond::VS}, VC{Cond::VC}, HI{Cond::HI}, LS{Cond::LS}, GE{Cond::GE}, LT{Cond::LT}, GT{Cond::GT}, LE{Cond::LE}, AL{Cond::AL}, NV{Cond::NV}, HS{Cond::HS}, LO{Cond::LO};
inline constexpr auto UXTB{MultiTypedName<AddSubExt::UXTB>{}};
inline constexpr auto UXTH{MultiTypedName<AddSubExt::UXTH>{}};
inline constexpr auto UXTW{MultiTypedName<AddSubExt::UXTW, IndexExt::UXTW>{}};
inline constexpr auto UXTX{MultiTypedName<AddSubExt::UXTX>{}};
inline constexpr auto SXTB{MultiTypedName<AddSubExt::SXTB>{}};
inline constexpr auto SXTH{MultiTypedName<AddSubExt::SXTH>{}};
inline constexpr auto SXTW{MultiTypedName<AddSubExt::SXTW, IndexExt::SXTW>{}};
inline constexpr auto SXTX{MultiTypedName<AddSubExt::SXTX, IndexExt::SXTX>{}};
inline constexpr auto LSL{MultiTypedName<AddSubExt::LSL, IndexExt::LSL, AddSubShift::LSL, LogShift::LSL, LslSymbol::LSL>{}};
inline constexpr auto LSR{MultiTypedName<AddSubShift::LSR, LogShift::LSR>{}};
inline constexpr auto ASR{MultiTypedName<AddSubShift::ASR, LogShift::ASR>{}};
inline constexpr auto ROR{MultiTypedName<LogShift::ROR>{}};
inline constexpr PostIndexed POST_INDEXED{};
inline constexpr PreIndexed PRE_INDEXED{};
inline constexpr MslSymbol MSL{MslSymbol::MSL};
} // namespace util
} // namespace oaknut

117
externals/oaknut/tests/basic.cpp vendored Normal file
View file

@ -0,0 +1,117 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <cstdint>
#include <cstdio>
#include <catch2/catch_test_macros.hpp>
#include "oaknut/code_block.hpp"
#include "oaknut/oaknut.hpp"
#include "rand_int.hpp"
using namespace oaknut;
using namespace oaknut::util;
TEST_CASE("Basic Test")
{
CodeBlock mem{4096};
CodeGenerator code{mem.ptr()};
mem.unprotect();
code.MOV(W0, 42);
code.RET();
mem.protect();
mem.invalidate_all();
int result = ((int (*)())mem.ptr())();
REQUIRE(result == 42);
}
TEST_CASE("Fibonacci")
{
CodeBlock mem{4096};
CodeGenerator code{mem.ptr()};
mem.unprotect();
auto fib = code.ptr<int (*)(int)>();
Label start, end, zero, recurse;
code.l(start);
code.STP(X29, X30, SP, PRE_INDEXED, -32);
code.STP(X20, X19, SP, 16);
code.MOV(X29, SP);
code.MOV(W19, W0);
code.SUBS(W0, W0, 1);
code.B(LT, zero);
code.B(NE, recurse);
code.MOV(W0, 1);
code.B(end);
code.l(zero);
code.MOV(W0, WZR);
code.B(end);
code.l(recurse);
code.BL(start);
code.MOV(W20, W0);
code.SUB(W0, W19, 2);
code.BL(start);
code.ADD(W0, W0, W20);
code.l(end);
code.LDP(X20, X19, SP, 16);
code.LDP(X29, X30, SP, POST_INDEXED, 32);
code.RET();
mem.protect();
mem.invalidate_all();
REQUIRE(fib(0) == 0);
REQUIRE(fib(1) == 1);
REQUIRE(fib(5) == 5);
REQUIRE(fib(9) == 34);
}
TEST_CASE("Immediate generation (32-bit)")
{
CodeBlock mem{4096};
for (int i = 0; i < 0x100000; i++) {
const std::uint32_t value = RandInt<std::uint32_t>(0, 0xffffffff);
CodeGenerator code{mem.ptr()};
auto f = code.ptr<std::uint64_t (*)()>();
mem.unprotect();
code.MOV(W0, value);
code.RET();
mem.protect();
mem.invalidate_all();
REQUIRE(f() == value);
}
}
TEST_CASE("Immediate generation (64-bit)")
{
CodeBlock mem{4096};
for (int i = 0; i < 0x100000; i++) {
const std::uint64_t value = RandInt<std::uint64_t>(0, 0xffffffff'ffffffff);
CodeGenerator code{mem.ptr()};
auto f = code.ptr<std::uint64_t (*)()>();
mem.unprotect();
code.MOV(X0, value);
code.RET();
mem.protect();
mem.invalidate_all();
REQUIRE(f() == value);
}
}

785
externals/oaknut/tests/fpsimd.cpp vendored Normal file
View file

@ -0,0 +1,785 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <array>
#include <cstdint>
#include <catch2/catch_test_macros.hpp>
#include "oaknut/oaknut.hpp"
#define T(HEX, CMD) \
TEST_CASE(#CMD) \
{ \
using namespace oaknut; \
using namespace oaknut::util; \
\
std::uint32_t result; \
CodeGenerator code{&result}; \
\
code.CMD; \
\
REQUIRE(result == HEX); \
}
T(0x5ee0bb61, ABS(D1, D27))
T(0x4e20ba03, ABS(V3.B16(), V16.B16()))
T(0x5ef98449, ADD(D9, D2, D25))
T(0x4eef8697, ADD(V23.D2(), V20.D2(), V15.D2()))
T(0x0eb743d1, ADDHN(V17.S2(), V30.D2(), V23.D2()))
T(0x5ef1b933, ADDP(D19, V9.D2()))
T(0x0e7ebf6e, ADDP(V14.H4(), V27.H4(), V30.H4()))
T(0x4e31ba47, ADDV(B7, V18.B16()))
// AESD
// AESE
// AESIMC
// AESMC
T(0x4e2b1d4c, AND(V12.B16(), V10.B16(), V11.B16()))
T(0x6f01b7f4, BIC(V20.H8(), 63, LSL, 8))
T(0x2f017752, BIC(V18.S2(), 58, LSL, 24))
T(0x0e751c85, BIC(V5.B8(), V4.B8(), V21.B8()))
T(0x2ef11d4d, BIF(V13.B8(), V10.B8(), V17.B8()))
T(0x2eb31f3b, BIT(V27.B8(), V25.B8(), V19.B8()))
T(0x2e711ed8, BSL(V24.B8(), V22.B8(), V17.B8()))
T(0x0e604aaf, CLS(V15.H4(), V21.H4()))
T(0x6e604808, CLZ(V8.H8(), V0.H8()))
T(0x7eff8ec2, CMEQ(D2, D22, D31))
T(0x2e2b8d57, CMEQ(V23.B8(), V10.B8(), V11.B8()))
T(0x5ee09bbf, CMEQ(D31, D29, 0))
T(0x4ea09876, CMEQ(V22.S4(), V3.S4(), 0))
T(0x5ef23c04, CMGE(D4, D0, D18))
T(0x4e203c6f, CMGE(V15.B16(), V3.B16(), V0.B16()))
T(0x7ee08822, CMGE(D2, D1, 0))
T(0x2ea08bb9, CMGE(V25.S2(), V29.S2(), 0))
T(0x5ef036a5, CMGT(D5, D21, D16))
T(0x0eb7358b, CMGT(V11.S2(), V12.S2(), V23.S2()))
T(0x5ee08957, CMGT(D23, D10, 0))
T(0x4ea088eb, CMGT(V11.S4(), V7.S4(), 0))
T(0x7ee235f2, CMHI(D18, D15, D2))
T(0x6e243596, CMHI(V22.B16(), V12.B16(), V4.B16()))
T(0x7ef23faf, CMHS(D15, D29, D18))
T(0x2e2d3d8a, CMHS(V10.B8(), V12.B8(), V13.B8()))
T(0x7ee098a4, CMLE(D4, D5, 0))
T(0x2e2098d3, CMLE(V19.B8(), V6.B8(), 0))
T(0x5ee0a980, CMLT(D0, D12, 0))
T(0x4e60a892, CMLT(V18.H8(), V4.H8(), 0))
T(0x5ee18e03, CMTST(D3, D16, D1))
T(0x4e708f65, CMTST(V5.H8(), V27.H8(), V16.H8()))
T(0x4e20598d, CNT(V13.B16(), V12.B16()))
// DUP
T(0x0e0d06cd, DUP(V13.B8(), V22.B()[6]))
T(0x0e010fe7, DUP(V7.B8(), WZR))
T(0x2e2b1e6e, EOR(V14.B8(), V19.B8(), V11.B8()))
T(0x6e1c0a35, EXT(V21.B16(), V17.B16(), V28.B16(), 1))
T(0x7ea0d7a4, FABD(S4, S29, S0))
T(0x6eecd418, FABD(V24.D2(), V0.D2(), V12.D2()))
T(0x1e20c299, FABS(S25, S20))
T(0x1e60c114, FABS(D20, D8))
T(0x4ee0f999, FABS(V25.D2(), V12.D2()))
T(0x7e71ef5b, FACGE(D27, D26, D17))
T(0x6e6eed17, FACGE(V23.D2(), V8.D2(), V14.D2()))
T(0x7ef8efc0, FACGT(D0, D30, D24))
T(0x6eb0ec54, FACGT(V20.S4(), V2.S4(), V16.S4()))
T(0x1e242b23, FADD(S3, S25, S4))
T(0x1e672a8b, FADD(D11, D20, D7))
T(0x4e34d46b, FADD(V11.S4(), V3.S4(), V20.S4()))
T(0x7e30db16, FADDP(S22, V24.S2()))
T(0x6e21d626, FADDP(V6.S4(), V17.S4(), V1.S4()))
T(0x1e23c4ce, FCCMP(S6, S3, 14, GT))
T(0x1e7104aa, FCCMP(D5, D17, 10, EQ))
T(0x1e28c678, FCCMPE(S19, S8, 8, GT))
T(0x1e6195dd, FCCMPE(D14, D1, 13, LS))
T(0x5e31e659, FCMEQ(S25, S18, S17))
T(0x0e27e7ba, FCMEQ(V26.S2(), V29.S2(), V7.S2()))
T(0x5ea0da1d, FCMEQ(S29, S16, 0.0))
T(0x4ee0db8a, FCMEQ(V10.D2(), V28.D2(), 0.0))
T(0x7e2de473, FCMGE(S19, S3, S13))
T(0x2e33e726, FCMGE(V6.S2(), V25.S2(), V19.S2()))
T(0x7ea0c8d3, FCMGE(S19, S6, 0.0))
T(0x6ea0ca7b, FCMGE(V27.S4(), V19.S4(), 0.0))
T(0x7eb7e65d, FCMGT(S29, S18, S23))
T(0x6ef0e6ac, FCMGT(V12.D2(), V21.D2(), V16.D2()))
T(0x5ee0cb5a, FCMGT(D26, D26, 0.0))
T(0x4ea0c917, FCMGT(V23.S4(), V8.S4(), 0.0))
T(0x7ea0dbe1, FCMLE(S1, S31, 0.0))
T(0x6ea0da69, FCMLE(V9.S4(), V19.S4(), 0.0))
T(0x5ea0ea5f, FCMLT(S31, S18, 0.0))
T(0x4ee0e8de, FCMLT(V30.D2(), V6.D2(), 0.0))
T(0x1e322040, FCMP(S2, S18))
T(0x1e202248, FCMP(S18, 0.0))
T(0x1e6520a0, FCMP(D5, D5))
T(0x1e602108, FCMP(D8, 0.0))
T(0x1e332370, FCMPE(S27, S19))
T(0x1e202018, FCMPE(S0, 0.0))
T(0x1e7120b0, FCMPE(D5, D17))
T(0x1e602298, FCMPE(D20, 0.0))
T(0x1e32ed68, FCSEL(S8, S11, S18, AL))
T(0x1e7b1e21, FCSEL(D1, D17, D27, NE))
T(0x1ee24022, FCVT(S2, H1))
T(0x1ee2c33c, FCVT(D28, H25))
T(0x1e23c379, FCVT(H25, S27))
T(0x1e22c1a5, FCVT(D5, S13))
T(0x1e63c2b1, FCVT(H17, D21))
T(0x1e624309, FCVT(S9, D24))
T(0x1e2400a0, FCVTAS(W0, S5))
T(0x9e24000e, FCVTAS(X14, S0))
T(0x1e640191, FCVTAS(W17, D12))
T(0x9e6403d6, FCVTAS(X22, D30))
T(0x5e21c8a7, FCVTAS(S7, S5))
T(0x0e21c8df, FCVTAS(V31.S2(), V6.S2()))
T(0x1e25036b, FCVTAU(W11, S27))
T(0x9e25030c, FCVTAU(X12, S24))
T(0x1e65002e, FCVTAU(W14, D1))
T(0x9e65003e, FCVTAU(X30, D1))
T(0x7e61cabd, FCVTAU(D29, D21))
T(0x2e21c880, FCVTAU(V0.S2(), V4.S2()))
T(0x4e217b66, FCVTL2(V6.S4(), V27.H8()))
T(0x1e30016d, FCVTMS(W13, S11))
T(0x9e3002b5, FCVTMS(X21, S21))
T(0x1e7003dd, FCVTMS(W29, D30))
T(0x9e700080, FCVTMS(X0, D4))
T(0x5e21b9b6, FCVTMS(S22, S13))
T(0x4e61ba4e, FCVTMS(V14.D2(), V18.D2()))
T(0x1e31002d, FCVTMU(W13, S1))
T(0x9e310281, FCVTMU(X1, S20))
T(0x1e71000e, FCVTMU(W14, D0))
T(0x9e710010, FCVTMU(X16, D0))
T(0x7e61bb3b, FCVTMU(D27, D25))
T(0x2e21b918, FCVTMU(V24.S2(), V8.S2()))
T(0x0e616a68, FCVTN(V8.S2(), V19.D2()))
T(0x1e200100, FCVTNS(W0, S8))
T(0x9e20037f, FCVTNS(XZR, S27))
T(0x1e60015e, FCVTNS(W30, D10))
T(0x9e600018, FCVTNS(X24, D0))
T(0x5e61a846, FCVTNS(D6, D2))
T(0x4e21aa81, FCVTNS(V1.S4(), V20.S4()))
T(0x1e210248, FCVTNU(W8, S18))
T(0x9e2103da, FCVTNU(X26, S30))
T(0x1e610120, FCVTNU(W0, D9))
T(0x9e61013a, FCVTNU(X26, D9))
T(0x7e61aaba, FCVTNU(D26, D21))
T(0x6e21aa16, FCVTNU(V22.S4(), V16.S4()))
T(0x1e28010d, FCVTPS(W13, S8))
T(0x9e2803df, FCVTPS(XZR, S30))
T(0x1e6802e9, FCVTPS(W9, D23))
T(0x9e6801f7, FCVTPS(X23, D15))
T(0x5ee1a986, FCVTPS(D6, D12))
T(0x4ea1aa32, FCVTPS(V18.S4(), V17.S4()))
T(0x1e29022b, FCVTPU(W11, S17))
T(0x9e290381, FCVTPU(X1, S28))
T(0x1e690095, FCVTPU(W21, D4))
T(0x9e6902b3, FCVTPU(X19, D21))
T(0x7ea1abbb, FCVTPU(S27, S29))
T(0x6ee1ab06, FCVTPU(V6.D2(), V24.D2()))
T(0x7e61687a, FCVTXN(S26, D3))
T(0x2e61694c, FCVTXN(V12.S2(), V10.D2()))
T(0x1e18c4d6, FCVTZS(W22, S6, 15))
T(0x9e18d131, FCVTZS(X17, S9, 12))
T(0x1e58fd9b, FCVTZS(W27, D12, 1))
T(0x9e5899ee, FCVTZS(X14, D15, 26))
T(0x1e380091, FCVTZS(W17, S4))
T(0x9e380289, FCVTZS(X9, S20))
T(0x1e780117, FCVTZS(W23, D8))
T(0x9e7800f5, FCVTZS(X21, D7))
T(0x5f2fffdb, FCVTZS(S27, S30, 17))
T(0x4f65ff65, FCVTZS(V5.D2(), V27.D2(), 27))
T(0x5ee1b932, FCVTZS(D18, D9))
T(0x4ee1ba41, FCVTZS(V1.D2(), V18.D2()))
T(0x1e19b5d8, FCVTZU(W24, S14, 19))
T(0x9e199462, FCVTZU(X2, S3, 27))
T(0x1e59fca1, FCVTZU(W1, D5, 1))
T(0x9e599bbd, FCVTZU(X29, D29, 26))
T(0x1e3900f6, FCVTZU(W22, S7))
T(0x9e3900b7, FCVTZU(X23, S5))
T(0x1e79031a, FCVTZU(W26, D24))
T(0x9e790248, FCVTZU(X8, D18))
T(0x7f5afd37, FCVTZU(D23, D9, 38))
T(0x2f34fd38, FCVTZU(V24.S2(), V9.S2(), 12))
T(0x7ea1baa3, FCVTZU(S3, S21))
T(0x6ee1b8c1, FCVTZU(V1.D2(), V6.D2()))
T(0x1e3d1999, FDIV(S25, S12, S29))
T(0x1e7e1a4e, FDIV(D14, D18, D30))
T(0x2e2cfe45, FDIV(V5.S2(), V18.S2(), V12.S2()))
T(0x1f114362, FMADD(S2, S27, S17, S16))
T(0x1f482240, FMADD(D0, D18, D8, D8))
T(0x1e234b5f, FMAX(S31, S26, S3))
T(0x1e694894, FMAX(D20, D4, D9))
T(0x4e29f568, FMAX(V8.S4(), V11.S4(), V9.S4()))
T(0x1e2f6a40, FMAXNM(S0, S18, S15))
T(0x1e6d6a99, FMAXNM(D25, D20, D13))
T(0x4e2dc6da, FMAXNM(V26.S4(), V22.S4(), V13.S4()))
T(0x7e30c9b9, FMAXNMP(S25, V13.S2()))
T(0x6e36c794, FMAXNMP(V20.S4(), V28.S4(), V22.S4()))
T(0x6e30c8f6, FMAXNMV(S22, V7.S4()))
T(0x7e30f8dd, FMAXP(S29, V6.S2()))
T(0x6e61f4ab, FMAXP(V11.D2(), V5.D2(), V1.D2()))
T(0x6e30fb85, FMAXV(S5, V28.S4()))
T(0x1e3c5aae, FMIN(S14, S21, S28))
T(0x1e7f58f8, FMIN(D24, D7, D31))
T(0x0eb0f63b, FMIN(V27.S2(), V17.S2(), V16.S2()))
T(0x1e317886, FMINNM(S6, S4, S17))
T(0x1e6e7a5d, FMINNM(D29, D18, D14))
T(0x4ea4c44c, FMINNM(V12.S4(), V2.S4(), V4.S4()))
T(0x7ef0c895, FMINNMP(D21, V4.D2()))
T(0x6efbc4e3, FMINNMP(V3.D2(), V7.D2(), V27.D2()))
T(0x6eb0c93d, FMINNMV(S29, V9.S4()))
T(0x7ef0fa13, FMINP(D19, V16.D2()))
T(0x2eb4f4ac, FMINP(V12.S2(), V5.S2(), V20.S2()))
T(0x6eb0f801, FMINV(S1, V0.S4()))
T(0x5f8219a6, FMLA(S6, S13, V2.S()[2]))
T(0x4fc512a1, FMLA(V1.D2(), V21.D2(), V5.D()[0]))
T(0x4e6bcecf, FMLA(V15.D2(), V22.D2(), V11.D2()))
T(0x5f8a5094, FMLS(S20, S4, V10.S()[0]))
T(0x4fd85b79, FMLS(V25.D2(), V27.D2(), V24.D()[1]))
T(0x0ebacca4, FMLS(V4.S2(), V5.S2(), V26.S2()))
T(0x1e270027, FMOV(S7, W1))
T(0x1e260164, FMOV(W4, S11))
T(0x9e670008, FMOV(D8, X0))
T(0x9eaf03e0, FMOV(V0.D()[1], XZR))
T(0x9e660090, FMOV(X16, D4))
T(0x9eae025f, FMOV(XZR, V18.D()[1]))
T(0x1e204079, FMOV(S25, S3))
T(0x1e6042f8, FMOV(D24, D23))
T(0x1e32f01c, FMOV(S28, FImm8{true, 0b001, 0b0111})) // -5.75
T(0x1e74901e, FMOV(D30, FImm8{true, 0b010, 0b0100})) // -10.0
T(0x0f03f51a, FMOV(V26.S2(), FImm8{false, 0b110, 0b1000})) // 0.75
T(0x6f02f58e, FMOV(V14.D2(), FImm8{false, 0b100, 0b1100})) // 0.21875
T(0x1f0adaf5, FMSUB(S21, S23, S10, S22))
T(0x1f5da840, FMSUB(D0, D2, D29, D10))
T(0x5fa39bba, FMUL(S26, S29, V3.S()[3]))
T(0x4fb89ad2, FMUL(V18.S4(), V22.S4(), V24.S()[3]))
T(0x1e2b0a3c, FMUL(S28, S17, S11))
T(0x1e720933, FMUL(D19, D9, D18))
T(0x6e7edfa3, FMUL(V3.D2(), V29.D2(), V30.D2()))
T(0x5e32dee6, FMULX(S6, S23, S18))
T(0x0e27deec, FMULX(V12.S2(), V23.S2(), V7.S2()))
T(0x7f879a1f, FMULX(S31, S16, V7.S()[2]))
T(0x6fce9836, FMULX(V22.D2(), V1.D2(), V14.D()[1]))
T(0x1e2142cc, FNEG(S12, S22))
T(0x1e61434b, FNEG(D11, D26))
T(0x6ea0fb90, FNEG(V16.S4(), V28.S4()))
T(0x1f361be5, FNMADD(S5, S31, S22, S6))
T(0x1f7a316d, FNMADD(D13, D11, D26, D12))
T(0x1f3e9957, FNMSUB(S23, S10, S30, S6))
T(0x1f79da66, FNMSUB(D6, D19, D25, D22))
T(0x1e208ab5, FNMUL(S21, S21, S0))
T(0x1e6f89eb, FNMUL(D11, D15, D15))
T(0x5ea1da18, FRECPE(S24, S16))
T(0x0ea1d9df, FRECPE(V31.S2(), V14.S2()))
T(0x5e2dfe37, FRECPS(S23, S17, S13))
T(0x0e29fcec, FRECPS(V12.S2(), V7.S2(), V9.S2()))
T(0x5ee1f998, FRECPX(D24, D12))
T(0x1e264106, FRINTA(S6, S8))
T(0x1e664376, FRINTA(D22, D27))
T(0x6e6188a9, FRINTA(V9.D2(), V5.D2()))
T(0x1e27c216, FRINTI(S22, S16))
T(0x1e67c071, FRINTI(D17, D3))
T(0x6ea19b9d, FRINTI(V29.S4(), V28.S4()))
T(0x1e25413e, FRINTM(S30, S9))
T(0x1e6541a1, FRINTM(D1, D13))
T(0x4e619ad8, FRINTM(V24.D2(), V22.D2()))
T(0x1e244098, FRINTN(S24, S4))
T(0x1e6440b4, FRINTN(D20, D5))
T(0x4e618835, FRINTN(V21.D2(), V1.D2()))
T(0x1e24c188, FRINTP(S8, S12))
T(0x1e64c292, FRINTP(D18, D20))
T(0x0ea18a69, FRINTP(V9.S2(), V19.S2()))
T(0x1e274146, FRINTX(S6, S10))
T(0x1e674333, FRINTX(D19, D25))
T(0x6e619902, FRINTX(V2.D2(), V8.D2()))
T(0x1e25c2b2, FRINTZ(S18, S21))
T(0x1e65c008, FRINTZ(D8, D0))
T(0x0ea19918, FRINTZ(V24.S2(), V8.S2()))
T(0x7ea1dbdb, FRSQRTE(S27, S30))
T(0x6ee1d8df, FRSQRTE(V31.D2(), V6.D2()))
T(0x5ee0ff40, FRSQRTS(D0, D26, D0))
T(0x4eb6fe31, FRSQRTS(V17.S4(), V17.S4(), V22.S4()))
T(0x1e21c204, FSQRT(S4, S16))
T(0x1e61c31c, FSQRT(D28, D24))
T(0x6ea1fa1f, FSQRT(V31.S4(), V16.S4()))
T(0x1e273b28, FSUB(S8, S25, S7))
T(0x1e6139b9, FSUB(D25, D13, D1))
T(0x0eadd6b0, FSUB(V16.S2(), V21.S2(), V13.S2()))
// INS
// INS
T(0x0c407b24, LD1(List{V4.S2()}, X25))
T(0x4c40a891, LD1(List{V17.S4(), V18.S4()}, X4))
T(0x0c406d31, LD1(List{V17.D1(), V18.D1(), V19.D1()}, X9))
T(0x4c402b00, LD1(List{V0.S4(), V1.S4(), V2.S4(), V3.S4()}, X24))
T(0x4cdf72c8, LD1(List{V8.B16()}, X22, POST_INDEXED, 16))
T(0x0cd67504, LD1(List{V4.H4()}, X8, POST_INDEXED, X22))
T(0x0cdfaeb7, LD1(List{V23.D1(), V24.D1()}, X21, POST_INDEXED, 16))
T(0x0cd0a837, LD1(List{V23.S2(), V24.S2()}, X1, POST_INDEXED, X16))
T(0x4cdf6d36, LD1(List{V22.D2(), V23.D2(), V24.D2()}, X9, POST_INDEXED, 48))
T(0x0cdc685b, LD1(List{V27.S2(), V28.S2(), V29.S2()}, X2, POST_INDEXED, X28))
T(0x0cdf2ebc, LD1(List{V28.D1(), V29.D1(), V30.D1(), V31.D1()}, X21, POST_INDEXED, 32))
T(0x0cc0260c, LD1(List{V12.H4(), V13.H4(), V14.H4(), V15.H4()}, X16, POST_INDEXED, X0))
T(0x0d400665, LD1(List{V5.B()}[1], X19))
T(0x0d4041da, LD1(List{V26.H()}[0], X14))
T(0x0d40815b, LD1(List{V27.S()}[0], X10))
T(0x0d408755, LD1(List{V21.D()}[0], X26))
T(0x4ddf0966, LD1(List{V6.B()}[10], X11, POST_INDEXED, 1))
T(0x4dcc1951, LD1(List{V17.B()}[14], X10, POST_INDEXED, X12))
T(0x0ddf58cf, LD1(List{V15.H()}[3], X6, POST_INDEXED, 2))
T(0x0dd14a3d, LD1(List{V29.H()}[1], X17, POST_INDEXED, X17))
T(0x0ddf8072, LD1(List{V18.S()}[0], X3, POST_INDEXED, 4))
T(0x4dcb90bb, LD1(List{V27.S()}[3], X5, POST_INDEXED, X11))
T(0x4ddf8537, LD1(List{V23.D()}[1], X9, POST_INDEXED, 8))
T(0x0dcf8784, LD1(List{V4.D()}[0], X28, POST_INDEXED, X15))
T(0x0d40c0f1, LD1R(List{V17.B8()}, X7))
T(0x0ddfceac, LD1R(List{V12.D1()}, X21, POST_INDEXED, 8))
T(0x4dd5c9c2, LD1R(List{V2.S4()}, X14, POST_INDEXED, X21))
T(0x0c408bc8, LD2(List{V8.S2(), V9.S2()}, X30))
T(0x0cdf842a, LD2(List{V10.H4(), V11.H4()}, X1, POST_INDEXED, 16))
T(0x0cd58678, LD2(List{V24.H4(), V25.H4()}, X19, POST_INDEXED, X21))
T(0x0d60132f, LD2(List{V15.B(), V16.B()}[4], X25))
T(0x4d605156, LD2(List{V22.H(), V23.H()}[6], X10))
T(0x0d609293, LD2(List{V19.S(), V20.S()}[1], X20))
T(0x4d608599, LD2(List{V25.D(), V26.D()}[1], X12))
T(0x4dff0bd6, LD2(List{V22.B(), V23.B()}[10], X30, POST_INDEXED, 2))
T(0x0df90bab, LD2(List{V11.B(), V12.B()}[2], X29, POST_INDEXED, X25))
T(0x4dff42c3, LD2(List{V3.H(), V4.H()}[4], X22, POST_INDEXED, 4))
T(0x4dfa5816, LD2(List{V22.H(), V23.H()}[7], X0, POST_INDEXED, X26))
T(0x4dff9372, LD2(List{V18.S(), V19.S()}[3], X27, POST_INDEXED, 8))
T(0x4de483c0, LD2(List{V0.S(), V1.S()}[2], X30, POST_INDEXED, X4))
T(0x4dff8714, LD2(List{V20.D(), V21.D()}[1], X24, POST_INDEXED, 16))
T(0x4dfa854d, LD2(List{V13.D(), V14.D()}[1], X10, POST_INDEXED, X26))
T(0x4d60ca33, LD2R(List{V19.S4(), V20.S4()}, X17))
T(0x0dffc777, LD2R(List{V23.H4(), V24.H4()}, X27, POST_INDEXED, 4))
T(0x4de9c3cd, LD2R(List{V13.B16(), V14.B16()}, X30, POST_INDEXED, X9))
T(0x0c404032, LD3(List{V18.B8(), V19.B8(), V20.B8()}, X1))
T(0x0cdf4bc8, LD3(List{V8.S2(), V9.S2(), V10.S2()}, X30, POST_INDEXED, 24))
T(0x4ccb4960, LD3(List{V0.S4(), V1.S4(), V2.S4()}, X11, POST_INDEXED, X11))
T(0x0d40217c, LD3(List{V28.B(), V29.B(), V30.B()}[0], X11))
T(0x4d407a38, LD3(List{V24.H(), V25.H(), V26.H()}[7], X17))
T(0x4d40a119, LD3(List{V25.S(), V26.S(), V27.S()}[2], X8))
T(0x0d40a6bb, LD3(List{V27.D(), V28.D(), V29.D()}[0], X21))
T(0x4ddf2bb1, LD3(List{V17.B(), V18.B(), V19.B()}[10], X29, POST_INDEXED, 3))
T(0x4dc13519, LD3(List{V25.B(), V26.B(), V27.B()}[13], X8, POST_INDEXED, X1))
T(0x4ddf6b3f, LD3(List{V31.H(), V0.H(), V1.H()}[5], X25, POST_INDEXED, 6))
T(0x4dc16243, LD3(List{V3.H(), V4.H(), V5.H()}[4], X18, POST_INDEXED, X1))
T(0x4ddfa329, LD3(List{V9.S(), V10.S(), V11.S()}[2], X25, POST_INDEXED, 12))
T(0x4ddab328, LD3(List{V8.S(), V9.S(), V10.S()}[3], X25, POST_INDEXED, X26))
T(0x4ddfa4e4, LD3(List{V4.D(), V5.D(), V6.D()}[1], X7, POST_INDEXED, 24))
T(0x0ddba58c, LD3(List{V12.D(), V13.D(), V14.D()}[0], X12, POST_INDEXED, X27))
T(0x0d40e3b3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X29))
T(0x0ddfe2f3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X23, POST_INDEXED, 3))
T(0x0ddbe8e4, LD3R(List{V4.S2(), V5.S2(), V6.S2()}, X7, POST_INDEXED, X27))
T(0x4c400a69, LD4(List{V9.S4(), V10.S4(), V11.S4(), V12.S4()}, X19))
T(0x0cdf0bea, LD4(List{V10.S2(), V11.S2(), V12.S2(), V13.S2()}, SP, POST_INDEXED, 32))
T(0x4cd705ad, LD4(List{V13.H8(), V14.H8(), V15.H8(), V16.H8()}, X13, POST_INDEXED, X23))
T(0x0d603b97, LD4(List{V23.B(), V24.B(), V25.B(), V26.B()}[6], X28))
T(0x0d606941, LD4(List{V1.H(), V2.H(), V3.H(), V4.H()}[1], X10))
T(0x0d60a039, LD4(List{V25.S(), V26.S(), V27.S(), V28.S()}[0], X1))
T(0x4d60a4c5, LD4(List{V5.D(), V6.D(), V7.D(), V8.D()}[1], X6))
T(0x0dff2139, LD4(List{V25.B(), V26.B(), V27.B(), V28.B()}[0], X9, POST_INDEXED, 4))
T(0x4df32513, LD4(List{V19.B(), V20.B(), V21.B(), V22.B()}[9], X8, POST_INDEXED, X19))
T(0x0dff7b45, LD4(List{V5.H(), V6.H(), V7.H(), V8.H()}[3], X26, POST_INDEXED, 8))
T(0x0dfa6839, LD4(List{V25.H(), V26.H(), V27.H(), V28.H()}[1], X1, POST_INDEXED, X26))
T(0x4dffa176, LD4(List{V22.S(), V23.S(), V24.S(), V25.S()}[2], X11, POST_INDEXED, 16))
T(0x4de0a125, LD4(List{V5.S(), V6.S(), V7.S(), V8.S()}[2], X9, POST_INDEXED, X0))
T(0x0dffa4ab, LD4(List{V11.D(), V12.D(), V13.D(), V14.D()}[0], X5, POST_INDEXED, 32))
T(0x0dfba784, LD4(List{V4.D(), V5.D(), V6.D(), V7.D()}[0], X28, POST_INDEXED, X27))
T(0x4d60ef82, LD4R(List{V2.D2(), V3.D2(), V4.D2(), V5.D2()}, X28))
T(0x0dffef23, LD4R(List{V3.D1(), V4.D1(), V5.D1(), V6.D1()}, X25, POST_INDEXED, 32))
T(0x4df5e36a, LD4R(List{V10.B16(), V11.B16(), V12.B16(), V13.B16()}, X27, POST_INDEXED, X21))
T(0x2c6dde58, LDNP(S24, S23, X18, -148))
T(0x6c5f8ad5, LDNP(D21, D2, X22, 504))
T(0xac793251, LDNP(Q17, Q12, X18, -224))
T(0x2cf1b345, LDP(S5, S12, X26, POST_INDEXED, -116))
T(0x6cc9489a, LDP(D26, D18, X4, POST_INDEXED, 144))
T(0xace34b69, LDP(Q9, Q18, X27, POST_INDEXED, -928))
T(0x2dca159f, LDP(S31, S5, X12, PRE_INDEXED, 80))
T(0x6df9682d, LDP(D13, D26, X1, PRE_INDEXED, -112))
T(0xadc7566f, LDP(Q15, Q21, X19, PRE_INDEXED, 224))
T(0x2d4efb01, LDP(S1, S30, X24, 116))
T(0x6d710b5a, LDP(D26, D2, X26, -240))
T(0xad74fbb0, LDP(Q16, Q30, X29, -368))
T(0x3c5b76a9, LDR(B9, X21, POST_INDEXED, -73))
T(0x7c5fd798, LDR(H24, X28, POST_INDEXED, -3))
T(0xbc4336b6, LDR(S22, X21, POST_INDEXED, 51))
T(0xfc53b4d5, LDR(D21, X6, POST_INDEXED, -197))
T(0x3cdf571d, LDR(Q29, X24, POST_INDEXED, -11))
T(0x3c5baf77, LDR(B23, X27, PRE_INDEXED, -70))
T(0x7c41bc79, LDR(H25, X3, PRE_INDEXED, 27))
T(0xbc48ecb2, LDR(S18, X5, PRE_INDEXED, 142))
T(0xfc4b1dee, LDR(D14, X15, PRE_INDEXED, 177))
T(0x3cc31c6a, LDR(Q10, X3, PRE_INDEXED, 49))
T(0x3d5a0ef6, LDR(B22, X23, 1667))
T(0x7d5d8dd7, LDR(H23, X14, 3782))
T(0xbd55d41a, LDR(S26, X0, 5588))
T(0xfd58c566, LDR(D6, X11, 12680))
T(0x3dce966e, LDR(Q14, X19, 14928))
T(0x1c8599c0, LDR(S0, -1002696))
T(0x5c8a1ca4, LDR(D4, -965740))
T(0x9cfd90fa, LDR(Q26, -19940))
T(0x3c634a12, LDR(B18, X16, W3, UXTW))
T(0x3c7368e7, LDR(B7, X7, X19, LSL, 0))
T(0x7c646a38, LDR(H24, X17, X4))
T(0xbc727bda, LDR(S26, X30, X18, LSL, 2))
T(0xfc63eb36, LDR(D22, X25, X3, SXTX))
T(0x3ce2ca06, LDR(Q6, X16, W2, SXTW))
T(0x3c4233e6, LDUR(B6, SP, 35))
T(0x7c4d52f1, LDUR(H17, X23, 213))
T(0xbc5be12f, LDUR(S15, X9, -66))
T(0xfc474197, LDUR(D23, X12, 116))
T(0x3cd703db, LDUR(Q27, X30, -144))
T(0x2f9a0354, MLA(V20.S2(), V26.S2(), V26.S()[0]))
T(0x4e7e9643, MLA(V3.H8(), V18.H8(), V30.H8()))
T(0x2f80484e, MLS(V14.S2(), V2.S2(), V0.S()[2]))
T(0x6ebb9572, MLS(V18.S4(), V11.S4(), V27.S4()))
T(0x6e135ec1, MOV(V1.B()[9], V22.B()[11]))
T(0x4e0f1da9, MOV(V9.B()[7], W13))
T(0x5e0e045d, MOV(H29, V2.H()[3]))
T(0x0e043ca1, MOV(W1, V5.S()[0]))
T(0x4e083df7, MOV(X23, V15.D()[0]))
// MOV
T(0x0f06e58e, MOVI(V14.B8(), 204))
T(0x4f058559, MOVI(V25.H8(), 170))
T(0x0f030565, MOVI(V5.S2(), 107))
T(0x0f05c4dc, MOVI(V28.S2(), 166, MSL, 8))
T(0x2f07e47e, MOVI(D30, RepImm{0b11100011})) //
T(0x6f03e65b, MOVI(V27.D2(), RepImm{0b01110010})) //
T(0x0f9e813e, MUL(V30.S2(), V9.S2(), V30.S()[0]))
T(0x4ea59f8e, MUL(V14.S4(), V28.S4(), V5.S4()))
T(0x2e205acd, MVN(V13.B8(), V22.B8()))
T(0x2f0084e1, MVNI(V1.H4(), 7))
T(0x6f026602, MVNI(V2.S4(), 80, LSL, 24))
T(0x2f03c71a, MVNI(V26.S2(), 120, MSL, 8))
T(0x7ee0ba9e, NEG(D30, D20))
T(0x2ea0b9f7, NEG(V23.S2(), V15.S2()))
// NOT
T(0x4ef81f0f, ORN(V15.B16(), V24.B16(), V24.B16()))
T(0x4f03b4e0, ORR(V0.H8(), 103, LSL, 8))
T(0x4f043508, ORR(V8.S4(), 136, LSL, 8))
T(0x4eb21c9c, ORR(V28.B16(), V4.B16(), V18.B16()))
T(0x2e279d77, PMUL(V23.B8(), V11.B8(), V7.B8()))
T(0x4e27e299, PMULL2(V25.H8(), V20.B16(), V7.B16()))
T(0x2eab4048, RADDHN(V8.S2(), V2.D2(), V11.D2()))
T(0x6e605b7e, RBIT(V30.B16(), V27.B16()))
T(0x0e201b37, REV16(V23.B8(), V25.B8()))
T(0x6e60098a, REV32(V10.H8(), V12.H8()))
T(0x0e2009de, REV64(V30.B8(), V14.B8()))
T(0x4f218e4e, RSHRN2(V14.S4(), V18.D2(), 31))
T(0x6e7460f2, RSUBHN2(V18.H8(), V7.S4(), V20.S4()))
T(0x0e377f74, SABA(V20.B8(), V27.B8(), V23.B8()))
T(0x4ea851f6, SABAL2(V22.D2(), V15.S4(), V8.S4()))
T(0x0e777752, SABD(V18.H4(), V26.H4(), V23.H4()))
T(0x0eba7005, SABDL(V5.D2(), V0.S2(), V26.S2()))
T(0x4e2069c4, SADALP(V4.H8(), V14.B16()))
T(0x4e270017, SADDL2(V23.H8(), V0.B16(), V7.B16()))
T(0x0ea028ca, SADDLP(V10.D1(), V6.S2()))
T(0x4e703b2a, SADDLV(S10, V25.H8()))
T(0x0e6311d2, SADDW(V18.S4(), V14.S4(), V3.H4()))
T(0x1e02c782, SCVTF(S2, W28, 15))
T(0x1e42d0e2, SCVTF(D2, W7, 12))
T(0x9e02e80e, SCVTF(S14, X0, 6))
T(0x9e423dda, SCVTF(D26, X14, 49))
T(0x1e2202f3, SCVTF(S19, W23))
T(0x1e6201e7, SCVTF(D7, W15))
T(0x9e22016c, SCVTF(S12, X11))
T(0x9e620316, SCVTF(D22, X24))
T(0x5f34e509, SCVTF(S9, S8, 12))
T(0x4f5ae716, SCVTF(V22.D2(), V24.D2(), 38))
T(0x5e61d946, SCVTF(D6, D10))
T(0x4e61d86b, SCVTF(V11.D2(), V3.D2()))
// SHA1C
// SHA1H
// SHA1M
// SHA1P
// SHA1SU0
// SHA1SU1
// SHA256H
// SHA256H2
// SHA256SU0
// SHA256SU1
T(0x4eb90506, SHADD(V6.S4(), V8.S4(), V25.S4()))
T(0x5f4d5767, SHL(D7, D27, 13))
T(0x4f1f542f, SHL(V15.H8(), V1.H8(), 15))
T(0x2ea13a71, SHLL(V17.D2(), V19.S2(), 32))
T(0x4f0885fd, SHRN2(V29.B16(), V15.H8(), 8))
T(0x0eb42794, SHSUB(V20.S2(), V28.S2(), V20.S2()))
T(0x7f5f54ad, SLI(D13, D5, 31))
T(0x6f09554e, SLI(V14.B16(), V10.B16(), 1))
T(0x0e316452, SMAX(V18.B8(), V2.B8(), V17.B8()))
T(0x4e66a478, SMAXP(V24.H8(), V3.H8(), V6.H8()))
T(0x0e30a9e6, SMAXV(B6, V15.B8()))
T(0x4e276e2a, SMIN(V10.B16(), V17.B16(), V7.B16()))
T(0x4e29ad73, SMINP(V19.B16(), V11.B16(), V9.B16()))
T(0x0e71aac5, SMINV(H5, V22.H4()))
T(0x4f9f2b00, SMLAL2(V0.D2(), V24.S4(), V31.S()[2]))
T(0x4e788037, SMLAL2(V23.S4(), V1.H8(), V24.H8()))
T(0x4f7362b9, SMLSL2(V25.S4(), V21.H8(), V3.H()[3]))
T(0x0e31a0d5, SMLSL(V21.H8(), V6.B8(), V17.B8()))
T(0x0e162fc3, SMOV(W3, V30.H()[5]))
T(0x4e0a2cf2, SMOV(X18, V7.H()[2]))
T(0x0f6ba85c, SMULL(V28.S4(), V2.H4(), V11.H()[6]))
T(0x4e61c2a1, SMULL2(V1.S4(), V21.H8(), V1.H8()))
T(0x5e20794c, SQABS(B12, B10))
T(0x4e607b9b, SQABS(V27.H8(), V28.H8()))
T(0x5eb50df4, SQADD(S20, S15, S21))
T(0x0e370ff4, SQADD(V20.B8(), V31.B8(), V23.B8()))
T(0x5fab3a4e, SQDMLAL(D14, S18, V11.S()[3]))
T(0x4f5b3805, SQDMLAL2(V5.S4(), V0.H8(), V11.H()[5]))
T(0x5e7f90ed, SQDMLAL(S13, H7, H31))
T(0x0ea992b2, SQDMLAL(V18.D2(), V21.S2(), V9.S2()))
T(0x5f867ba2, SQDMLSL(D2, S29, V6.S()[2]))
T(0x4f997118, SQDMLSL2(V24.D2(), V8.S4(), V25.S()[0]))
T(0x5e62b0b2, SQDMLSL(S18, H5, H2))
T(0x0e74b089, SQDMLSL(V9.S4(), V4.H4(), V20.H4()))
T(0x5f5acb3c, SQDMULH(H28, H25, V10.H()[5]))
T(0x4f7bc13d, SQDMULH(V29.H8(), V9.H8(), V11.H()[3]))
T(0x5e6ab724, SQDMULH(H4, H25, H10))
T(0x4ea6b543, SQDMULH(V3.S4(), V10.S4(), V6.S4()))
T(0x5f89b899, SQDMULL(D25, S4, V9.S()[2]))
T(0x0f53b2ee, SQDMULL(V14.S4(), V23.H4(), V3.H()[1]))
T(0x5e60d01a, SQDMULL(S26, H0, H0))
T(0x0eb4d146, SQDMULL(V6.D2(), V10.S2(), V20.S2()))
T(0x7ee07b81, SQNEG(D1, D28))
T(0x2e607a04, SQNEG(V4.H4(), V16.H4()))
T(0x5f47dac8, SQRDMULH(H8, H22, V7.H()[4]))
T(0x0f45db93, SQRDMULH(V19.H4(), V28.H4(), V5.H()[4]))
T(0x7ea3b621, SQRDMULH(S1, S17, S3))
T(0x6ea2b672, SQRDMULH(V18.S4(), V19.S4(), V2.S4()))
T(0x5e7c5ee7, SQRSHL(H7, H23, H28))
T(0x4e655e4b, SQRSHL(V11.H8(), V18.H8(), V5.H8()))
T(0x5f0c9c10, SQRSHRN(B16, H0, 4))
T(0x4f309e99, SQRSHRN2(V25.S4(), V20.D2(), 16))
T(0x7f1f8de7, SQRSHRUN(H7, S15, 1))
T(0x6f178f67, SQRSHRUN2(V7.H8(), V27.S4(), 9))
T(0x5f7977b8, SQSHL(D24, D29, 57))
T(0x4f1e75f3, SQSHL(V19.H8(), V15.H8(), 14))
T(0x5eb24f5d, SQSHL(S29, S26, S18))
T(0x4e7c4c93, SQSHL(V19.H8(), V4.H8(), V28.H8()))
T(0x7f2e66a1, SQSHLU(S1, S21, 14))
T(0x6f4c65a2, SQSHLU(V2.D2(), V13.D2(), 12))
T(0x5f3f950b, SQSHRN(S11, D8, 1))
T(0x4f329646, SQSHRN2(V6.S4(), V18.D2(), 14))
T(0x7f188469, SQSHRUN(H9, S3, 8))
T(0x6f328478, SQSHRUN2(V24.S4(), V3.D2(), 14))
T(0x5e362dae, SQSUB(B14, B13, B22))
T(0x0e3c2c86, SQSUB(V6.B8(), V4.B8(), V28.B8()))
T(0x5ea149fc, SQXTN(S28, D15))
T(0x4e214b24, SQXTN2(V4.B16(), V25.H8()))
T(0x7e61290e, SQXTUN(H14, S8))
T(0x6ea12b96, SQXTUN2(V22.S4(), V28.D2()))
T(0x4eae1673, SRHADD(V19.S4(), V19.S4(), V14.S4()))
T(0x7f794647, SRI(D7, D18, 7))
T(0x6f654787, SRI(V7.D2(), V28.D2(), 27))
T(0x5ee0549e, SRSHL(D30, D4, D0))
T(0x4eba55d2, SRSHL(V18.S4(), V14.S4(), V26.S4()))
T(0x5f712744, SRSHR(D4, D26, 15))
T(0x4f2025f5, SRSHR(V21.S4(), V15.S4(), 32))
T(0x5f7734a9, SRSRA(D9, D5, 9))
T(0x0f3a371a, SRSRA(V26.S2(), V24.S2(), 6))
T(0x5eed44ee, SSHL(D14, D7, D13))
T(0x0e704683, SSHL(V3.H4(), V20.H4(), V16.H4()))
T(0x4f2aa7c3, SSHLL2(V3.D2(), V30.S4(), 10))
T(0x5f5e058d, SSHR(D13, D12, 34))
T(0x4f730496, SSHR(V22.D2(), V4.D2(), 13))
T(0x5f5e152a, SSRA(D10, D9, 34))
T(0x0f21172b, SSRA(V11.S2(), V25.S2(), 31))
T(0x4e24220f, SSUBL2(V15.H8(), V16.B16(), V4.B16()))
T(0x4e3f32a2, SSUBW2(V2.H8(), V21.H8(), V31.B16()))
T(0x0c007a62, ST1(List{V2.S2()}, X19))
T(0x4c00adb7, ST1(List{V23.D2(), V24.D2()}, X13))
T(0x0c006b92, ST1(List{V18.S2(), V19.S2(), V20.S2()}, X28))
T(0x4c0029b8, ST1(List{V24.S4(), V25.S4(), V26.S4(), V27.S4()}, X13))
T(0x0c9f7f60, ST1(List{V0.D1()}, X27, POST_INDEXED, 8))
T(0x0c9f7ebc, ST1(List{V28.D1()}, X21, POST_INDEXED, 8))
T(0x0c9faf06, ST1(List{V6.D1(), V7.D1()}, X24, POST_INDEXED, 16))
T(0x4c93aff5, ST1(List{V21.D2(), V22.D2()}, SP, POST_INDEXED, X19))
T(0x4c9f6398, ST1(List{V24.B16(), V25.B16(), V26.B16()}, X28, POST_INDEXED, 48))
T(0x4c8162ff, ST1(List{V31.B16(), V0.B16(), V1.B16()}, X23, POST_INDEXED, X1))
T(0x0c9f23ee, ST1(List{V14.B8(), V15.B8(), V16.B8(), V17.B8()}, SP, POST_INDEXED, 32))
T(0x4c862148, ST1(List{V8.B16(), V9.B16(), V10.B16(), V11.B16()}, X10, POST_INDEXED, X6))
T(0x0d001c7a, ST1(List{V26.B()}[7], X3))
T(0x0d005b54, ST1(List{V20.H()}[3], X26))
T(0x4d009392, ST1(List{V18.S()}[3], X28))
T(0x4d008509, ST1(List{V9.D()}[1], X8))
T(0x4d9f1246, ST1(List{V6.B()}[12], X18, POST_INDEXED, 1))
T(0x0d8c17f5, ST1(List{V21.B()}[5], SP, POST_INDEXED, X12))
T(0x4d9f53ee, ST1(List{V14.H()}[6], SP, POST_INDEXED, 2))
T(0x0d8f48c4, ST1(List{V4.H()}[1], X6, POST_INDEXED, X15))
T(0x4d9f8185, ST1(List{V5.S()}[2], X12, POST_INDEXED, 4))
T(0x0d8c92bc, ST1(List{V28.S()}[1], X21, POST_INDEXED, X12))
T(0x4d9f86b3, ST1(List{V19.D()}[1], X21, POST_INDEXED, 8))
T(0x4d9c8442, ST1(List{V2.D()}[1], X2, POST_INDEXED, X28))
T(0x4c008a69, ST2(List{V9.S4(), V10.S4()}, X19))
T(0x4c9f8930, ST2(List{V16.S4(), V17.S4()}, X9, POST_INDEXED, 32))
T(0x0c9a8993, ST2(List{V19.S2(), V20.S2()}, X12, POST_INDEXED, X26))
T(0x0d2001ac, ST2(List{V12.B(), V13.B()}[0], X13))
T(0x4d20495c, ST2(List{V28.H(), V29.H()}[5], X10))
T(0x4d2093e4, ST2(List{V4.S(), V5.S()}[3], SP))
T(0x4d208482, ST2(List{V2.D(), V3.D()}[1], X4))
T(0x4dbf0e40, ST2(List{V0.B(), V1.B()}[11], X18, POST_INDEXED, 2))
T(0x0db8085f, ST2(List{V31.B(), V0.B()}[2], X2, POST_INDEXED, X24))
T(0x0dbf4a2d, ST2(List{V13.H(), V14.H()}[1], X17, POST_INDEXED, 4))
T(0x4db1417e, ST2(List{V30.H(), V31.H()}[4], X11, POST_INDEXED, X17))
T(0x0dbf81af, ST2(List{V15.S(), V16.S()}[0], X13, POST_INDEXED, 8))
T(0x0dbf831c, ST2(List{V28.S(), V29.S()}[0], X24, POST_INDEXED, 8))
T(0x0dbf846a, ST2(List{V10.D(), V11.D()}[0], X3, POST_INDEXED, 16))
T(0x0dab85dc, ST2(List{V28.D(), V29.D()}[0], X14, POST_INDEXED, X11))
T(0x0c004a09, ST3(List{V9.S2(), V10.S2(), V11.S2()}, X16))
T(0x4c9f4768, ST3(List{V8.H8(), V9.H8(), V10.H8()}, X27, POST_INDEXED, 48))
T(0x0c944918, ST3(List{V24.S2(), V25.S2(), V26.S2()}, X8, POST_INDEXED, X20))
T(0x0d003f80, ST3(List{V0.B(), V1.B(), V2.B()}[7], X28))
T(0x0d007306, ST3(List{V6.H(), V7.H(), V8.H()}[2], X24))
T(0x0d00b131, ST3(List{V17.S(), V18.S(), V19.S()}[1], X9))
T(0x4d00a5f8, ST3(List{V24.D(), V25.D(), V26.D()}[1], X15))
T(0x0d9f27c1, ST3(List{V1.B(), V2.B(), V3.B()}[1], X30, POST_INDEXED, 3))
T(0x4d992bb2, ST3(List{V18.B(), V19.B(), V20.B()}[10], X29, POST_INDEXED, X25))
T(0x0d9f785d, ST3(List{V29.H(), V30.H(), V31.H()}[3], X2, POST_INDEXED, 6))
T(0x4d8b726b, ST3(List{V11.H(), V12.H(), V13.H()}[6], X19, POST_INDEXED, X11))
T(0x4d9fa342, ST3(List{V2.S(), V3.S(), V4.S()}[2], X26, POST_INDEXED, 12))
T(0x4d80b206, ST3(List{V6.S(), V7.S(), V8.S()}[3], X16, POST_INDEXED, X0))
T(0x4d9fa5de, ST3(List{V30.D(), V31.D(), V0.D()}[1], X14, POST_INDEXED, 24))
T(0x4d8ba6d7, ST3(List{V23.D(), V24.D(), V25.D()}[1], X22, POST_INDEXED, X11))
T(0x0c00034f, ST4(List{V15.B8(), V16.B8(), V17.B8(), V18.B8()}, X26))
T(0x4c9f038c, ST4(List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, X28, POST_INDEXED, 64))
T(0x4c800719, ST4(List{V25.H8(), V26.H8(), V27.H8(), V28.H8()}, X24, POST_INDEXED, X0))
T(0x0d2021a8, ST4(List{V8.B(), V9.B(), V10.B(), V11.B()}[0], X13))
T(0x4d2062cd, ST4(List{V13.H(), V14.H(), V15.H(), V16.H()}[4], X22))
T(0x0d20b146, ST4(List{V6.S(), V7.S(), V8.S(), V9.S()}[1], X10))
T(0x4d20a6f5, ST4(List{V21.D(), V22.D(), V23.D(), V24.D()}[1], X23))
T(0x0dbf2d56, ST4(List{V22.B(), V23.B(), V24.B(), V25.B()}[3], X10, POST_INDEXED, 4))
T(0x4da631df, ST4(List{V31.B(), V0.B(), V1.B(), V2.B()}[12], X14, POST_INDEXED, X6))
T(0x0dbf7a76, ST4(List{V22.H(), V23.H(), V24.H(), V25.H()}[3], X19, POST_INDEXED, 8))
T(0x0dbb698e, ST4(List{V14.H(), V15.H(), V16.H(), V17.H()}[1], X12, POST_INDEXED, X27))
T(0x4dbfb37f, ST4(List{V31.S(), V0.S(), V1.S(), V2.S()}[3], X27, POST_INDEXED, 16))
T(0x4dadb3d1, ST4(List{V17.S(), V18.S(), V19.S(), V20.S()}[3], X30, POST_INDEXED, X13))
T(0x4dbfa5b3, ST4(List{V19.D(), V20.D(), V21.D(), V22.D()}[1], X13, POST_INDEXED, 32))
T(0x4db5a7cf, ST4(List{V15.D(), V16.D(), V17.D(), V18.D()}[1], X30, POST_INDEXED, X21))
T(0x2c29149a, STNP(S26, S5, X4, -184))
T(0x6c229316, STNP(D22, D4, X24, -472))
T(0xac3bc3c8, STNP(Q8, Q16, X30, -144))
T(0x2cacdf66, STP(S6, S23, X27, POST_INDEXED, -156))
T(0x6c826f4f, STP(D15, D27, X26, POST_INDEXED, 32))
T(0xac97955a, STP(Q26, Q5, X10, POST_INDEXED, 752))
T(0x2da7ba37, STP(S23, S14, X17, PRE_INDEXED, -196))
T(0x6d8bcbce, STP(D14, D18, X30, PRE_INDEXED, 184))
T(0xad8b4ba6, STP(Q6, Q18, X29, PRE_INDEXED, 352))
T(0x2d1f7434, STP(S20, S29, X1, 248))
T(0x6d3bb5d8, STP(D24, D13, X14, -72))
T(0xad09088a, STP(Q10, Q2, X4, 288))
T(0x3c066467, STR(B7, X3, POST_INDEXED, 102))
T(0x7c070723, STR(H3, X25, POST_INDEXED, 112))
T(0xbc13175a, STR(S26, X26, POST_INDEXED, -207))
T(0xfc1be536, STR(D22, X9, POST_INDEXED, -66))
T(0x3c99b56b, STR(Q11, X11, POST_INDEXED, -101))
T(0x3c002d49, STR(B9, X10, PRE_INDEXED, 2))
T(0x7c158e09, STR(H9, X16, PRE_INDEXED, -168))
T(0xbc06bc8d, STR(S13, X4, PRE_INDEXED, 107))
T(0xfc080eae, STR(D14, X21, PRE_INDEXED, 128))
T(0x3c8e7ed9, STR(Q25, X22, PRE_INDEXED, 231))
T(0x3d275492, STR(B18, X4, 2517))
T(0x7d0b4265, STR(H5, X19, 1440))
T(0xbd0d2595, STR(S21, X12, 3364))
T(0xfd237a73, STR(D19, X19, 18160))
T(0x3db4a5f5, STR(Q21, X15, 53904))
T(0x3c3e693c, STR(B28, X9, X30, LSL, 0))
T(0x3c3b6ac5, STR(B5, X22, X27, LSL, 0))
T(0x7c36faf0, STR(H16, X23, X22, SXTX, 1))
T(0xbc27f838, STR(S24, X1, X7, SXTX, 2))
T(0xfc29db51, STR(D17, X26, W9, SXTW, 3))
T(0x3cbfea8f, STR(Q15, X20, XZR, SXTX))
T(0x3c0441c8, STUR(B8, X14, 68))
T(0x7c00b0d7, STUR(H23, X6, 11))
T(0xbc0d117d, STUR(S29, X11, 209))
T(0xfc1f03c0, STUR(D0, X30, -16))
T(0x3c9753f0, STUR(Q16, SP, -139))
T(0x7eeb84f9, SUB(D25, D7, D11))
T(0x6e708714, SUB(V20.H8(), V24.H8(), V16.H8()))
T(0x4e766323, SUBHN2(V3.H8(), V25.S4(), V22.S4()))
T(0x5e203935, SUQADD(B21, B9))
T(0x4e203b33, SUQADD(V19.B16(), V25.B16()))
// SXTL
T(0x0e0c20db, TBL(V27.B8(), List{V6.B16(), V7.B16()}, V12.B8()))
T(0x4e1d43ab, TBL(V11.B16(), List{V29.B16(), V30.B16(), V31.B16()}, V29.B16()))
T(0x0e07634f, TBL(V15.B8(), List{V26.B16(), V27.B16(), V28.B16(), V29.B16()}, V7.B8()))
T(0x0e0603b9, TBL(V25.B8(), List{V29.B16()}, V6.B8()))
T(0x0e05317a, TBX(V26.B8(), List{V11.B16(), V12.B16()}, V5.B8()))
T(0x4e0150ca, TBX(V10.B16(), List{V6.B16(), V7.B16(), V8.B16()}, V1.B16()))
T(0x4e0e7190, TBX(V16.B16(), List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, V14.B16()))
T(0x4e1b1333, TBX(V19.B16(), List{V25.B16()}, V27.B16()))
T(0x4e0829e3, TRN1(V3.B16(), V15.B16(), V8.B16()))
T(0x4ecc6b24, TRN2(V4.D2(), V25.D2(), V12.D2()))
T(0x2e697f5d, UABA(V29.H4(), V26.H4(), V9.H4()))
T(0x2e36519e, UABAL(V30.H8(), V12.B8(), V22.B8()))
T(0x6e6975e0, UABD(V0.H8(), V15.H8(), V9.H8()))
T(0x2e2e718a, UABDL(V10.H8(), V12.B8(), V14.B8()))
T(0x6ea069b1, UADALP(V17.D2(), V13.S4()))
T(0x2e6d0349, UADDL(V9.S4(), V26.H4(), V13.H4()))
T(0x6e602bfc, UADDLP(V28.S4(), V31.H8()))
T(0x6e703b6d, UADDLV(S13, V27.H8()))
T(0x2e781352, UADDW(V18.S4(), V26.S4(), V24.H4()))
T(0x1e03ec95, UCVTF(S21, W4, 5))
T(0x1e43fd36, UCVTF(D22, W9, 1))
T(0x9e03a27b, UCVTF(S27, X19, 24))
T(0x9e43e9c4, UCVTF(D4, X14, 6))
T(0x1e230096, UCVTF(S22, W4))
T(0x1e630076, UCVTF(D22, W3))
T(0x9e2302c8, UCVTF(S8, X22))
T(0x9e6302cd, UCVTF(D13, X22))
T(0x7f2ce5a2, UCVTF(S2, S13, 20))
T(0x6f4be788, UCVTF(V8.D2(), V28.D2(), 53))
T(0x7e21d87f, UCVTF(S31, S3))
T(0x2e21da7d, UCVTF(V29.S2(), V19.S2()))
T(0x2e7b0674, UHADD(V20.H4(), V19.H4(), V27.H4()))
T(0x6ea9277f, UHSUB(V31.S4(), V27.S4(), V9.S4()))
T(0x6e7a6658, UMAX(V24.H8(), V18.H8(), V26.H8()))
T(0x2e23a513, UMAXP(V19.B8(), V8.B8(), V3.B8()))
T(0x2e70a9b5, UMAXV(H21, V13.H4()))
T(0x6e7d6ef2, UMIN(V18.H8(), V23.H8(), V29.H8()))
T(0x2e6eae4e, UMINP(V14.H4(), V18.H4(), V14.H4()))
T(0x2e71abe6, UMINV(H6, V31.H4()))
T(0x6fb820fa, UMLAL2(V26.D2(), V7.S4(), V24.S()[1]))
T(0x6ebc83ab, UMLAL2(V11.D2(), V29.S4(), V28.S4()))
T(0x2f5c61cf, UMLSL(V15.S4(), V14.H4(), V12.H()[1]))
T(0x6e6aa2e2, UMLSL2(V2.S4(), V23.H8(), V10.H8()))
T(0x0e0f3fb8, UMOV(W24, V29.B()[7]))
// UMOV
T(0x6f62a05c, UMULL2(V28.S4(), V2.H8(), V2.H()[2]))
T(0x6e6cc3b0, UMULL2(V16.S4(), V29.H8(), V12.H8()))
T(0x7ea40f68, UQADD(S8, S27, S4))
T(0x6eac0e8f, UQADD(V15.S4(), V20.S4(), V12.S4()))
T(0x7e2a5df5, UQRSHL(B21, B15, B10))
T(0x6ef55fc9, UQRSHL(V9.D2(), V30.D2(), V21.D2()))
T(0x7f0b9db4, UQRSHRN(B20, H13, 5))
T(0x2f159d7d, UQRSHRN(V29.H4(), V11.S4(), 11))
T(0x7f6c755c, UQSHL(D28, D10, 44))
T(0x6f6175ec, UQSHL(V12.D2(), V15.D2(), 33))
T(0x7eef4ff4, UQSHL(D20, D31, D15))
T(0x6e3d4f2e, UQSHL(V14.B16(), V25.B16(), V29.B16()))
T(0x7f1f94d2, UQSHRN(H18, S6, 1))
T(0x6f3397e4, UQSHRN2(V4.S4(), V31.D2(), 13))
T(0x7ee12cad, UQSUB(D13, D5, D1))
T(0x2e712ff3, UQSUB(V19.H4(), V31.H4(), V17.H4()))
T(0x7e614b06, UQXTN(H6, S24))
T(0x6e2149ec, UQXTN2(V12.B16(), V15.H8()))
T(0x0ea1c849, URECPE(V9.S2(), V2.S2()))
T(0x6eb51740, URHADD(V0.S4(), V26.S4(), V21.S4()))
T(0x7eeb57f8, URSHL(D24, D31, D11))
T(0x6e335531, URSHL(V17.B16(), V9.B16(), V19.B16()))
T(0x7f65253d, URSHR(D29, D9, 27))
T(0x2f102566, URSHR(V6.H4(), V11.H4(), 16))
T(0x2ea1cb59, URSQRTE(V25.S2(), V26.S2()))
T(0x7f54345f, URSRA(D31, D2, 44))
T(0x2f1b345f, URSRA(V31.H4(), V2.H4(), 5))
T(0x7ef94448, USHL(D8, D2, D25))
T(0x6ea14621, USHL(V1.S4(), V17.S4(), V1.S4()))
T(0x2f33a5a1, USHLL(V1.D2(), V13.S2(), 19))
T(0x7f5405d0, USHR(D16, D14, 44))
T(0x6f450505, USHR(V5.D2(), V8.D2(), 59))
T(0x7ea038c1, USQADD(S1, S6))
T(0x2e203b60, USQADD(V0.B8(), V27.B8()))
T(0x7f4616d2, USRA(D18, D22, 58))
T(0x2f1a1713, USRA(V19.H4(), V24.H4(), 6))
T(0x2e3f226e, USUBL(V14.H8(), V19.B8(), V31.B8()))
T(0x6e7a33a0, USUBW2(V0.S4(), V29.S4(), V26.H8()))
// UXTL
T(0x4e1b1a1f, UZP1(V31.B16(), V16.B16(), V27.B16()))
T(0x4ecc597b, UZP2(V27.D2(), V11.D2(), V12.D2()))
T(0x0e212af7, XTN(V23.B8(), V23.H8()))
T(0x4e853928, ZIP1(V8.S4(), V9.S4(), V5.S4()))
T(0x0e977a78, ZIP2(V24.S2(), V19.S2(), V23.S2()))

1079
externals/oaknut/tests/general.cpp vendored Normal file

File diff suppressed because it is too large Load diff

20
externals/oaknut/tests/rand_int.hpp vendored Normal file
View file

@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <random>
#include <type_traits>
template<typename T>
T RandInt(T min, T max)
{
static_assert(std::is_integral_v<T>, "T must be an integral type.");
static_assert(!std::is_same_v<T, signed char> && !std::is_same_v<T, unsigned char>,
"Using char with uniform_int_distribution is undefined behavior.");
static std::random_device rd;
static std::mt19937 mt(rd());
std::uniform_int_distribution<T> rand(min, max);
return rand(mt);
}