From 99c0a73f91e7a5e66db686f29e158e99193a043d Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 28 Jan 2024 14:56:59 +0000 Subject: [PATCH] Squashed 'externals/oaknut/' changes from c24f918e5..d0488d932 d0488d932 oaknut: 2.0.0 40ad78bbf oaknut: Implement DualCodeBlock and related support 9f131cfb5 oaknut: add configuration for standalone installation 69799b43c oaknut: Test building for Android on CI 1d51f5512 oaknut: 1.2.2 918bd94f0 oaknut: Eliminate -Wconversion warnings 316d8869e oaknut: Fix edgecases in MOVP2R on +/-4GiB boundary d8634eaa1 oaknut: Fix page boundary error in ADP d0ca9a24e oaknut: Update README examples for CPU feature detection dbeec268b oaknut: feature_detection_freebsd: Warn about incompatibility with earlier FreeBSD versions 86e5386e2 oaknut: feature_detect: Support NetBSD df4cf2d48 oaknut: feature_detect: Support OpenBSD 99dfff25a oaknut: feature_detection: Read ID registers 319b3d2c9 oaknut: Add basic CPU feature detection 23e9ddb4c oaknut: CI: Don't run slow tests on OpenBSD 734f1bdb4 oaknut: CI: Use up-to-date qemu f462c9774 oaknut: CI: Build on OpenBSD 19cd42204 oaknut: code_block: Add NetBSD and OpenBSD support 18b86a3ec oaknut: SystemReg: Add more EL0 accessible registers 53c43bf0c oaknut/tests: Reduce iterations for MOVP2R cc37df19e oaknut: Test on FreeBSD a66b32d26 oaknut: Fix crossing sign boundary in PageOffset 206468d72 oaknut: CI: Add macos-arm64 build e6eecc3f9 oaknut: 1.2.1 4252d8f4a oaknut: CMakeLists: Warnings are errors on MSVC 408eed65f oaknut: arm64_encode_helpers: remove unreachable code bfc8eedfb oaknut: arm64_encode_helpers: p maybe unused ff4456eca oaknut: Avoid negation of unsigned values b4ac8fd6c oaknut: Fix MOV for applications of MOVN 0575cadc4 oaknut: Disable certain functionality where absolute addressing is not available 394a3c8f0 oaknut: Appease MSVC 011183670 oaknut: 1.2.0 e83c9f327 oaknut: Add VectorCodeGenerator 5eb122cc5 oaknut: Tidy up public header 45c5a7b25 oaknut: Fix clang-format errors 36243256f oaknut: Add `const` qualifier to `AddrOffset` ctor 4af500cb5 oaknut: Add `ptr` accessor to `Label` bccb06669 oaknut: CodeGenerator const correctness da0590a86 oaknut: github: Update package repositories git-subtree-dir: externals/oaknut git-subtree-split: d0488d9320ae673167dd9117223e3453d5ff102f --- .clang-format | 3 - .github/workflows/build-and-test.yml | 140 +++++++- CMakeLists.txt | 48 ++- README.md | 156 ++++++++- include/oaknut/code_block.hpp | 20 +- include/oaknut/dual_code_block.hpp | 165 +++++++++ .../oaknut/feature_detection/cpu_feature.hpp | 107 ++++++ .../feature_detection/feature_detection.hpp | 35 ++ .../feature_detection_apple.hpp | 112 ++++++ .../feature_detection_freebsd.hpp | 62 ++++ .../feature_detection_generic.hpp | 23 ++ .../feature_detection_hwcaps.hpp | 120 +++++++ .../feature_detection_idregs.hpp | 167 +++++++++ .../feature_detection_linux.hpp | 45 +++ .../feature_detection_netbsd.hpp | 81 +++++ .../feature_detection_openbsd.hpp | 63 ++++ .../feature_detection_w32.hpp | 99 ++++++ .../oaknut/feature_detection/id_registers.hpp | 318 ++++++++++++++++++ .../read_id_registers_directly.hpp | 52 +++ .../oaknut/impl/arm64_encode_helpers.inc.hpp | 57 +++- include/oaknut/impl/cpu_feature.inc.hpp | 78 +++++ include/oaknut/impl/enum.hpp | 62 +++- include/oaknut/impl/imm.hpp | 16 +- .../impl/mnemonics_generic_v8.0.inc.hpp | 12 +- .../impl/mnemonics_generic_v8.2.inc.hpp | 4 +- include/oaknut/impl/oaknut_exception.inc.hpp | 1 + include/oaknut/impl/offset.hpp | 12 +- include/oaknut/impl/overloaded.hpp | 16 + include/oaknut/impl/reg.hpp | 12 +- include/oaknut/impl/string_literal.hpp | 18 + include/oaknut/oaknut.hpp | 218 ++++++------ oaknutConfig.cmake.in | 5 + tests/_feature_detect.cpp | 71 ++++ tests/basic.cpp | 173 ++++++++-- tests/fpsimd.cpp | 24 +- tests/general.cpp | 24 +- tests/vector_code_gen.cpp | 83 +++++ 37 files changed, 2481 insertions(+), 221 deletions(-) create mode 100644 include/oaknut/dual_code_block.hpp create mode 100644 include/oaknut/feature_detection/cpu_feature.hpp create mode 100644 include/oaknut/feature_detection/feature_detection.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_apple.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_freebsd.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_generic.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_hwcaps.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_idregs.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_linux.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_netbsd.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_openbsd.hpp create mode 100644 include/oaknut/feature_detection/feature_detection_w32.hpp create mode 100644 include/oaknut/feature_detection/id_registers.hpp create mode 100644 include/oaknut/feature_detection/read_id_registers_directly.hpp create mode 100644 include/oaknut/impl/cpu_feature.inc.hpp create mode 100644 include/oaknut/impl/overloaded.hpp create mode 100644 oaknutConfig.cmake.in create mode 100644 tests/_feature_detect.cpp create mode 100644 tests/vector_code_gen.cpp diff --git a/.clang-format b/.clang-format index 28884fb5..2e462553 100644 --- a/.clang-format +++ b/.clang-format @@ -2,7 +2,6 @@ Language: Cpp AccessModifierOffset: -4 AlignAfterOpenBracket: Align -AlignConsecutiveMacros: None AlignConsecutiveAssignments: None AlignConsecutiveBitFields: None AlignConsecutiveDeclarations: None @@ -175,7 +174,6 @@ SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: false -SpaceAroundPointerQualifiers: Default SpaceBeforeAssignmentOperators: true SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false @@ -189,7 +187,6 @@ SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false -SpacesInConditionalStatement: false SpacesInCStyleCastParentheses: false SpacesInConditionalStatement: false SpacesInContainerLiterals: false diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b7c396d7..2918c83e 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -9,6 +9,9 @@ jobs: - name: Checkout oaknut repo uses: actions/checkout@v3 + - name: Update package repositories + run: sudo apt-get update + - name: Install dependencies run: > sudo apt-get install -q -y @@ -17,6 +20,22 @@ jobs: ninja-build qemu-user + - name: Checkout qemu + uses: actions/checkout@v3 + with: + repository: qemu/qemu + ref: v8.1.2 + path: externals/qemu + + - name: Build qemu + working-directory: externals/qemu + run: | + sudo apt-get install git libglib2.0-dev libfdt-dev libpixman-1-dev zlib1g-dev ninja-build + mkdir build + cd build + ../configure --target-list=aarch64-linux-user + make -j4 qemu-aarch64 + - name: Checkout Catch2 v3 repo uses: actions/checkout@v3 with: @@ -41,8 +60,8 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: qemu-aarch64 -L /usr/aarch64-linux-gnu ./oaknut-tests -d yes - + run: ../externals/qemu/build/qemu-aarch64 -L /usr/aarch64-linux-gnu ./oaknut-tests -d yes + test_on_windows: runs-on: windows-latest name: msvc-arm64 @@ -57,7 +76,7 @@ jobs: repository: catchorg/Catch2 ref: v3.2.0 path: externals/catch - + - name: Setup msvc-arm64 environment uses: ilammy/msvc-dev-cmd@v1 with: @@ -73,3 +92,118 @@ jobs: - name: Build working-directory: ${{github.workspace}}/build run: cmake --build . --config Release + + test_on_macos: + runs-on: macos-latest + name: macos-arm64 + + steps: + - name: Checkout oaknut repo + uses: actions/checkout@v3 + + - name: Checkout Catch2 v3 repo + uses: actions/checkout@v3 + with: + repository: catchorg/Catch2 + ref: v3.2.0 + path: externals/catch + + - name: Install dependencies + run: | + brew install ninja + + - name: Configure CMake + run: > + cmake + -B ${{github.workspace}}/build + -GNinja + -DCMAKE_OSX_ARCHITECTURES=arm64 + -DOAKNUT_USE_BUNDLED_CATCH=ON + + - name: Build + working-directory: ${{github.workspace}}/build + run: cmake --build . --config Release + + test_on_freebsd: + runs-on: ubuntu-latest + name: freebsd-arm64 + + steps: + - name: Checkout oaknut repo + uses: actions/checkout@v3 + + - name: Build and Test + uses: cross-platform-actions/action@v0.19.1 + with: + operating_system: freebsd + architecture: arm64 + version: '13.2' + shell: bash + run: | + pwd + sudo pkg update + sudo pkg install -y catch2 cmake ninja + cmake -B ${{github.workspace}}/build -GNinja + cd build + cmake --build . --config Release + ./oaknut-tests -d yes + + test_on_openbsd: + runs-on: ubuntu-latest + name: openbsd-arm64 + + steps: + - name: Checkout oaknut repo + uses: actions/checkout@v3 + + - name: Build and Test + uses: cross-platform-actions/action@v0.19.1 + with: + operating_system: openbsd + architecture: arm64 + version: '7.3' + shell: bash + run: | + pwd + sudo pkg_add catch2 cmake ninja + cmake -B ${{github.workspace}}/build -GNinja + cd build + cmake --build . --config Release + ./oaknut-tests -d yes "~[slow]" + + test_on_android: + runs-on: ubuntu-latest + name: android + + steps: + - name: Checkout oaknut repo + uses: actions/checkout@v3 + + - name: Update package repositories + run: sudo apt-get update + + - name: Install dependencies + run: sudo apt-get install -q -y ninja-build + + - name: Checkout Catch2 v3 repo + uses: actions/checkout@v3 + with: + repository: catchorg/Catch2 + ref: v3.2.0 + path: externals/catch + + - name: Configure CMake + run: > + cmake + -B ${{github.workspace}}/build + -H. + -GNinja + -DANDROID_ABI=arm64-v8a + -DANDROID_PLATFORM=30 + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake + -DOAKNUT_USE_BUNDLED_CATCH=ON + + - name: Build + working-directory: ${{github.workspace}}/build + run: cmake --build . --config Release diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c825a2a..a0278d64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,11 @@ cmake_minimum_required(VERSION 3.8) -project(oaknut LANGUAGES CXX VERSION 1.1.6) +project(oaknut LANGUAGES CXX VERSION 2.0.0) # Determine if we're built as a subproject (using add_subdirectory) # or if this is the master project. set(MASTER_PROJECT OFF) if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) - set(MASTER_PROJECT ON) + set(MASTER_PROJECT ON) endif() # Disable in-source builds @@ -17,26 +17,41 @@ endif() # Source project files set(header_files + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/dual_code_block.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/cpu_feature.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/feature_detection.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/id_registers.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/cpu_feature.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.1.inc.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/multi_typed_name.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/offset.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/overloaded.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut_exception.hpp ) +include(GNUInstallDirs) + # Library definition add_library(oaknut INTERFACE) add_library(merry::oaknut ALIAS oaknut) target_sources(oaknut INTERFACE "$") -target_include_directories(oaknut INTERFACE $) +target_include_directories(oaknut INTERFACE + $ + $ +) target_compile_features(oaknut INTERFACE cxx_std_20) # Tests @@ -49,9 +64,12 @@ if (MASTER_PROJECT) endif() add_executable(oaknut-tests + tests/_feature_detect.cpp tests/basic.cpp tests/fpsimd.cpp tests/general.cpp + tests/rand_int.hpp + tests/vector_code_gen.cpp ) target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests) target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut) @@ -61,6 +79,7 @@ if (MASTER_PROJECT) /external:W0 /external:anglebrackets /W4 + /WX /w44263 # Non-virtual member function hides base class virtual function /w44265 # Class has virtual functions, but destructor is not virtual /w44456 # Declaration of 'var' hides previous local declaration @@ -78,19 +97,34 @@ if (MASTER_PROJECT) /Zc:inline # Omits inline functions from object-file output. /Zc:throwingNew # Assumes new (without std::nothrow) never returns null. /volatile:iso # Use strict standard-abiding volatile semantics - /bigobj # Increase number of sections in .obj files - /DNOMINMAX ) else() target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces) endif() endif() -# Export -include(GNUInstallDirs) +# Install +include(CMakePackageConfigHelpers) install(TARGETS oaknut EXPORT oaknutTargets) install(EXPORT oaknutTargets NAMESPACE merry:: DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/oaknut" ) + +configure_package_config_file("${CMAKE_CURRENT_SOURCE_DIR}/oaknutConfig.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/oaknutConfig.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/oaknut" +) +write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/oaknutConfigVersion.cmake" + COMPATIBILITY SameMajorVersion +) +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/oaknutConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/oaknutConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/oaknut" +) +install(DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" +) diff --git a/README.md b/README.md index 9f5c688c..8e32760b 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,13 @@ Oaknut is a header-only library that allows one to dynamically assemble code in- ## Usage -Provide `oaknut::CodeGenerator` with a pointer to a block of memory. Call functions on it to emit code. +Give `oaknut::CodeGenerator` a pointer to a block of memory. Call functions on it to emit code. Simple example: ```cpp #include +#include #include using EmittedFunction = int (*)(); @@ -20,7 +21,7 @@ EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value) { using namespace oaknut::util; - EmittedFunction result = code.ptr(); + EmittedFunction result = code.xptr(); code.MOV(W0, value); code.RET(); @@ -31,7 +32,7 @@ EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value) int main() { oaknut::CodeBlock mem{4096}; - oaknut::CodeGenerator code{mem.ptr()}; + oaknut::CodeGenerator code{mem.ptr(), mem.ptr()}; mem.unprotect(); @@ -46,6 +47,78 @@ int main() } ``` +CodeGenerator takes two pointers. The first pointer is the memory address to write to, and the second pointer is the memory address that the code will be executing from. This allows you to write to a buffer before copying to the final destination for execution, or to have to use dual-mapped memory blocks to avoid memory protection overhead. + +Below is an example of using the oaknut-provided utility header for dual-mapped memory blocks: + +```cpp +#include +#include +#include + +using EmittedFunction = ; + +int main() +{ + using namespace oaknut::util; + + oaknut::DualCodeBlock mem{4096}; + oaknut::CodeGenerator code{mem.wptr(), mem.xptr()}; + + const auto result = code.xptr(); + + code.MOV(W0, value); + code.RET(); + + mem.invalidate_all(); + + std::printf("%i\n", fn()); // Output: 42 + + return 0; +} +``` + +### Emit to `std::vector` + +If you wish to merely emit code into memory without executing it, or if you are developing a cross-compiler that is not running on an ARM64 device, you can use `oaknut::VectorCodeGenerator` instead. + +Provide `oaknut::VectorCodeGenerator` with a reference to a `std::vector` and it will append to that vector. + +The second pointer argument represents the destination address the code will eventually be executed from. + +Simple example: + +```cpp +#include +#include +#include +#include + +int main() +{ + std::vector vec; + oaknut::VectorCodeGenerator code{vec, (uint32_t*)0x1000}; + + code.MOV(W0, 42); + code.RET(); + + std::printf("%08x %08x\n", vec[0], vec[1]); // Output: d2800540 d65f03c0 + + return 0; +} +``` + +## Headers + +| Header | Compiles on non-ARM64 | Contents | +| ------ | --------------------- | -------- | +| `` | Yes | Provides `CodeGenerator` and `VectorCodeGenerator` for code emission, as well as the `oaknut::util` namespace. | +| `` | No | Utility header that provides `CodeBlock`, allocates, alters permissions of, and invalidates executable memory. | +| `` | No | Utility header that provides `DualCodeBlock`, which allocates two mirrored memory blocks (with RW and RX permissions respectively). | +| `` | Yes | Provides `OaknutException` which is thrown on an error. | +| `` | Yes | Utility header that provides `CpuFeatures` which can be used to describe AArch64 features. | +| `` | No | Utility header that provides `detect_features` and `read_id_registers` for determining available AArch64 features. | + ### Instructions Each AArch64 instruction corresponds to one emitter function. For a list of emitter functions see: @@ -108,6 +181,83 @@ List{V0.B(), V1.B(), V2.B()}[1] // This expression has type List`, then call `detect_features` to get a bitset of features in a cross-platform manner. + +CPU feature detection is operating system specific, and some operating systems even have multiple methods. Here are a list of supported operating systems and implemented methods: + +| Operating system | Default Method | +| ---- | ---- | +| Linux / Android | [ELF hwcaps](https://www.kernel.org/doc/html/latest/arch/arm64/elf_hwcaps.html) | +| Apple | [sysctlbyname](https://developer.apple.com/documentation/kernel/1387446-sysctlbyname) | +| Windows | [IsProcessorFeaturePresent](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent) | +| FreeBSD | ELF hwcaps | +| NetBSD | machdep.cpu%d.cpu_id sysctl | +| OpenBSD | CTL_MACHDEP.CPU_ID_* sysctl | + +There are alternative methods available for advanced users to specify specific methods to detect features if they wish. (See `detect_features_via_*`.) + +Simple example: + +```cpp +#include +#include + +int main() { + oaknut::CpuFeatures feats = oaknut::detect_features(); + + std::printf("CPU supports JSCVT: %i\n", feats.has(oaknut::CpuFeature::JSCVT)); +} +``` + +### ID registers + +We also provide a crossplatform way for ID registers to be read: + +| **`OAKNUT_SUPPORTS_READING_ID_REGISTERS`** | Available functionality | +| ---- | ---- | +| 0 | Reading ID registers is not supported on this operating system. | +| 1 | This operating system provides a system-wide set of ID registers, use `read_id_registers()`. | +| 2 | Per-core ID registers, use `get_core_count()` and `read_id_registers(int index)`. | + +All of the above operating systems with the exception of apple also support reading ID registers, and if one prefers one can do feature detection via `detect_features_via_id_registers(*read_id_registers())`. + +Simple example: + +```cpp +#include +#include +#include + +int main() { +#if OAKNUT_SUPPORTS_READING_ID_REGISTERS == 1 + + oaknut::id::IdRegisters id = oaknut::read_id_registers(); + + std::printf("ISAR0 register: %08x\n", id.isar0.value); + +#elif OAKNUT_SUPPORTS_READING_ID_REGISTERS == 2 + + oaknut::id::IdRegisters id = oaknut::read_id_registers(0); + + const std::size_t core_count = oaknut::get_core_count(); + for (std::size_t core_index = 0; core_index < core_count; core_index++) { + std::printf("ISAR0 register (for core %zu): %08x\n", core_index, id.isar0.value); + } + +#else + + std::printf("Reading ID registers not supported\n"); + +#endif +} +``` + ## License This project is [MIT licensed](LICENSE). diff --git a/include/oaknut/code_block.hpp b/include/oaknut/code_block.hpp index 1c29ad09..bfa87d96 100644 --- a/include/oaknut/code_block.hpp +++ b/include/oaknut/code_block.hpp @@ -36,6 +36,10 @@ public: # else m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0); # endif +#elif defined(__NetBSD__) + m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC), MAP_ANON | MAP_PRIVATE, -1, 0); +#elif defined(__OpenBSD__) + m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); #else m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); #endif @@ -68,23 +72,19 @@ public: void protect() { -#if defined(__APPLE__) -# if TARGET_OS_IPHONE - mprotect(m_memory, m_size, PROT_READ | PROT_EXEC); -# else +#if defined(__APPLE__) && !TARGET_OS_IPHONE pthread_jit_write_protect_np(1); -# endif +#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__) + mprotect(m_memory, m_size, PROT_READ | PROT_EXEC); #endif } void unprotect() { -#if defined(__APPLE__) -# if TARGET_OS_IPHONE - mprotect(m_memory, m_size, PROT_READ | PROT_WRITE); -# else +#if defined(__APPLE__) && !TARGET_OS_IPHONE pthread_jit_write_protect_np(0); -# endif +#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__) + mprotect(m_memory, m_size, PROT_READ | PROT_WRITE); #endif } diff --git a/include/oaknut/dual_code_block.hpp b/include/oaknut/dual_code_block.hpp new file mode 100644 index 00000000..eb6e19d9 --- /dev/null +++ b/include/oaknut/dual_code_block.hpp @@ -0,0 +1,165 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include + +#if defined(_WIN32) +# define NOMINMAX +# include +#elif defined(__APPLE__) +# include +# include + +# include +# include +# include +# include +# include +#else +# if !defined(_GNU_SOURCE) +# define _GNU_SOURCE +# endif +# include +# include +# include +#endif + +namespace oaknut { + +class DualCodeBlock { +public: + explicit DualCodeBlock(std::size_t size) + : m_size(size) + { +#if defined(_WIN32) + m_wmem = m_xmem = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); + if (m_wmem == nullptr) + throw std::bad_alloc{}; +#elif defined(__APPLE__) + m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + if (m_wmem == MAP_FAILED) + throw std::bad_alloc{}; + + vm_prot_t cur_prot, max_prot; + kern_return_t ret = vm_remap(mach_task_self(), (vm_address_t*)&m_xmem, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, mach_task_self(), (mach_vm_address_t)m_wmem, false, &cur_prot, &max_prot, VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) + throw std::bad_alloc{}; + + mprotect(m_xmem, size, PROT_READ | PROT_EXEC); +#else +# if defined(__OpenBSD__) + char tmpl[] = "oaknut_dual_code_block.XXXXXXXXXX"; + fd = shm_mkstemp(tmpl); + if (fd < 0) + throw std::bad_alloc{}; + shm_unlink(tmpl); +# else + fd = memfd_create("oaknut_dual_code_block", 0); + if (fd < 0) + throw std::bad_alloc{}; +# endif + + int ret = ftruncate(fd, size); + if (ret != 0) + throw std::bad_alloc{}; + + m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + m_xmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + + if (m_wmem == MAP_FAILED || m_xmem == MAP_FAILED) + throw std::bad_alloc{}; +#endif + } + + ~DualCodeBlock() + { +#if defined(_WIN32) + VirtualFree((void*)m_xmem, 0, MEM_RELEASE); +#elif defined(__APPLE__) +#else + munmap(m_wmem, m_size); + munmap(m_xmem, m_size); + close(fd); +#endif + } + + DualCodeBlock(const DualCodeBlock&) = delete; + DualCodeBlock& operator=(const DualCodeBlock&) = delete; + DualCodeBlock(DualCodeBlock&&) = delete; + DualCodeBlock& operator=(DualCodeBlock&&) = delete; + + /// Pointer to executable mirror of memory (permissions: R-X) + std::uint32_t* xptr() const + { + return m_xmem; + } + + /// Pointer to writeable mirror of memory (permissions: RW-) + std::uint32_t* wptr() const + { + return m_wmem; + } + + /// Invalidate should be used with executable memory pointers. + void invalidate(std::uint32_t* mem, std::size_t size) + { +#if defined(__APPLE__) + sys_icache_invalidate(mem, size); +#elif defined(_WIN32) + FlushInstructionCache(GetCurrentProcess(), mem, size); +#else + static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000; + + std::uint64_t ctr; + __asm__ volatile("mrs %0, ctr_el0" + : "=r"(ctr)); + + const std::size_t isize = icache_line_size = std::min(icache_line_size, 4 << ((ctr >> 0) & 0xf)); + const std::size_t dsize = dcache_line_size = std::min(dcache_line_size, 4 << ((ctr >> 16) & 0xf)); + + const std::uintptr_t end = (std::uintptr_t)mem + size; + + for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) { + __asm__ volatile("dc cvau, %0" + : + : "r"(addr) + : "memory"); + } + __asm__ volatile("dsb ish\n" + : + : + : "memory"); + + for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) { + __asm__ volatile("ic ivau, %0" + : + : "r"(addr) + : "memory"); + } + __asm__ volatile("dsb ish\nisb\n" + : + : + : "memory"); +#endif + } + + void invalidate_all() + { + invalidate(m_xmem, m_size); + } + +protected: +#if !defined(_WIN32) && !defined(__APPLE__) + int fd = -1; +#endif + std::uint32_t* m_xmem = nullptr; + std::uint32_t* m_wmem = nullptr; + std::size_t m_size = 0; +}; + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/cpu_feature.hpp b/include/oaknut/feature_detection/cpu_feature.hpp new file mode 100644 index 00000000..9f70c5b8 --- /dev/null +++ b/include/oaknut/feature_detection/cpu_feature.hpp @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L +# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr +#else +# define OAKNUT_CPU_FEATURES_CONSTEXPR +#endif + +namespace oaknut { + +// NOTE: This file contains code that can be compiled on non-arm64 systems. +// For run-time CPU feature detection, include feature_detection.hpp + +enum class CpuFeature { +#define OAKNUT_CPU_FEATURE(name) name, +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE +}; + +constexpr std::size_t cpu_feature_count = 0 +#define OAKNUT_CPU_FEATURE(name) +1 +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE + ; + +class CpuFeatures final { +public: + constexpr CpuFeatures() = default; + + OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list features) + { + for (CpuFeature f : features) { + m_bitset.set(static_cast(f)); + } + } + + constexpr bool has(CpuFeature feature) const + { + if (static_cast(feature) >= cpu_feature_count) + return false; + return m_bitset[static_cast(feature)]; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept + { + m_bitset &= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept + { + m_bitset |= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept + { + m_bitset ^= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept + { + CpuFeatures result; + result.m_bitset = ~m_bitset; + return result; + } + +private: + using bitset = std::bitset; + + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept; + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept; + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept; + + bitset m_bitset; +}; + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset & b.m_bitset; + return result; +} + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset | b.m_bitset; + return result; +} + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset ^ b.m_bitset; + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection.hpp b/include/oaknut/feature_detection/feature_detection.hpp new file mode 100644 index 00000000..1961864d --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection.hpp @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#if defined(__APPLE__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0 +# include "oaknut/feature_detection/feature_detection_apple.hpp" +#elif defined(__FreeBSD__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 +# include "oaknut/feature_detection/feature_detection_freebsd.hpp" +#elif defined(__linux__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 +# include "oaknut/feature_detection/feature_detection_linux.hpp" +#elif defined(__NetBSD__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2 +# include "oaknut/feature_detection/feature_detection_netbsd.hpp" +#elif defined(__OpenBSD__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 +# include "oaknut/feature_detection/feature_detection_openbsd.hpp" +#elif defined(_WIN32) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2 +# include "oaknut/feature_detection/feature_detection_w32.hpp" +#else +# define OAKNUT_CPU_FEATURE_DETECTION 0 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0 +# warning "Unsupported operating system for CPU feature detection" +# include "oaknut/feature_detection/feature_detection_generic.hpp" +#endif diff --git a/include/oaknut/feature_detection/feature_detection_apple.hpp b/include/oaknut/feature_detection/feature_detection_apple.hpp new file mode 100644 index 00000000..4c17825a --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_apple.hpp @@ -0,0 +1,112 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + +namespace detail { + +inline bool detect_feature(const char* const sysctl_name) +{ + int result = 0; + std::size_t result_size = sizeof(result); + if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) { + return result != 0; + } + return false; +} + +} // namespace detail + +inline CpuFeatures detect_features_via_sysctlbyname() +{ + CpuFeatures result; + + if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon")) + result |= CpuFeatures{CpuFeature::ASIMD}; + if (detail::detect_feature("hw.optional.floatingpoint")) + result |= CpuFeatures{CpuFeature::FP}; + if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp")) + result |= CpuFeatures{CpuFeature::FP16Conv}; + if (detail::detect_feature("hw.optional.arm.FEAT_BF16")) + result |= CpuFeatures{CpuFeature::BF16}; + if (detail::detect_feature("hw.optional.arm.FEAT_DotProd")) + result |= CpuFeatures{CpuFeature::DotProd}; + if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum")) + result |= CpuFeatures{CpuFeature::FCMA}; + if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm")) + result |= CpuFeatures{CpuFeature::FHM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16")) + result |= CpuFeatures{CpuFeature::FP16}; + if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS")) + result |= CpuFeatures{CpuFeature::FRINTTS}; + if (detail::detect_feature("hw.optional.arm.FEAT_I8MM")) + result |= CpuFeatures{CpuFeature::I8MM}; + if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT")) + result |= CpuFeatures{CpuFeature::JSCVT}; + if (detail::detect_feature("hw.optional.arm.FEAT_RDM")) + result |= CpuFeatures{CpuFeature::RDM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FlagM")) + result |= CpuFeatures{CpuFeature::FlagM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2")) + result |= CpuFeatures{CpuFeature::FlagM2}; + if (detail::detect_feature("hw.optional.armv8_crc32")) + result |= CpuFeatures{CpuFeature::CRC32}; + if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC")) + result |= CpuFeatures{CpuFeature::LRCPC}; + if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2")) + result |= CpuFeatures{CpuFeature::LRCPC2}; + if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics")) + result |= CpuFeatures{CpuFeature::LSE}; + if (detail::detect_feature("hw.optional.arm.FEAT_LSE2")) + result |= CpuFeatures{CpuFeature::LSE2}; + if (detail::detect_feature("hw.optional.arm.FEAT_AES")) + result |= CpuFeatures{CpuFeature::AES}; + if (detail::detect_feature("hw.optional.arm.FEAT_PMULL")) + result |= CpuFeatures{CpuFeature::PMULL}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA1")) + result |= CpuFeatures{CpuFeature::SHA1}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA256")) + result |= CpuFeatures{CpuFeature::SHA256}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512")) + result |= CpuFeatures{CpuFeature::SHA512}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3")) + result |= CpuFeatures{CpuFeature::SHA3}; + if (detail::detect_feature("hw.optional.arm.FEAT_BTI")) + result |= CpuFeatures{CpuFeature::BTI}; + if (detail::detect_feature("hw.optional.arm.FEAT_DPB")) + result |= CpuFeatures{CpuFeature::DPB}; + if (detail::detect_feature("hw.optional.arm.FEAT_DPB2")) + result |= CpuFeatures{CpuFeature::DPB2}; + if (detail::detect_feature("hw.optional.arm.FEAT_ECV")) + result |= CpuFeatures{CpuFeature::ECV}; + if (detail::detect_feature("hw.optional.arm.FEAT_SB")) + result |= CpuFeatures{CpuFeature::SB}; + if (detail::detect_feature("hw.optional.arm.FEAT_SSBS")) + result |= CpuFeatures{CpuFeature::SSBS}; + + return result; +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_sysctlbyname(); +} + +inline std::optional read_id_registers() +{ + return std::nullopt; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_freebsd.hpp b/include/oaknut/feature_detection/feature_detection_freebsd.hpp new file mode 100644 index 00000000..efb3c669 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_freebsd.hpp @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/id_registers.hpp" +#include "oaknut/feature_detection/read_id_registers_directly.hpp" + +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +#if __FreeBSD_version < 1300114 +# error "Incompatible ABI change (incorrect HWCAP definitions on earlier FreeBSD versions)" +#endif + +namespace oaknut { + +namespace detail { + +inline unsigned long getauxval(int aux) +{ + unsigned long result = 0; + if (::elf_aux_info(aux, &result, static_cast(sizeof result)) == 0) { + return result; + } + return 0; +} + +} // namespace detail + +inline CpuFeatures detect_features_via_hwcap() +{ + const unsigned long hwcap = detail::getauxval(AT_HWCAP); + const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2); + return detect_features_via_hwcap(hwcap, hwcap2); +} + +inline std::optional read_id_registers() +{ + // HWCAP_CPUID is falsely not set on many FreeBSD kernel versions, + // so we don't bother checking it. + return id::read_id_registers_directly(); +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_hwcap(); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_generic.hpp b/include/oaknut/feature_detection/feature_detection_generic.hpp new file mode 100644 index 00000000..405a9b6a --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_generic.hpp @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +inline CpuFeatures detect_features() +{ + return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD}; +} + +inline std::optional read_id_registers() +{ + return std::nullopt; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_hwcaps.hpp b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp new file mode 100644 index 00000000..09855258 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" + +namespace oaknut { + +namespace detail { + +template +constexpr bool bit_test(unsigned long value) +{ + return (((value >> bits) & 1) && ...); +} + +} // namespace detail + +inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2) +{ + CpuFeatures result; + +#define OAKNUT_DETECT_CAP(FEAT, ...) \ + if (detail::bit_test<__VA_ARGS__>(hwcap)) { \ + result |= CpuFeatures{CpuFeature::FEAT}; \ + } +#define OAKNUT_DETECT_CAP2(FEAT, ...) \ + if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \ + result |= CpuFeatures{CpuFeature::FEAT}; \ + } + + OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP + OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD + // HWCAP_EVTSTRM (2) + OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES + OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL + OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1 + OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2 + OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32 + OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS + OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP + OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP + // HWCAP_CPUID (11) + OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM + OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT + OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA + OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC + OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP + OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3 + OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3 + OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4 + OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP + OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512 + OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE + OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM + OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT + OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT + OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC + OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM + OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS + OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB + OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA + OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG + + OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP + OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2 + OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES + OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL + OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM + OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3 + OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4 + OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2 + OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT + OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM + OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM + OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM + OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16 + OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM + OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16 + OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH + OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG + OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI + OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE + OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV + OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP + OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES + OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3 + OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME + OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64 + OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64 + OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32 + OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32 + OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32 + OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32 + OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64 + OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFXT + OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16 + OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16 + OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC + OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM + OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1 + OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2 + OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1 + OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32 + OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32 + OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16 + OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16 + OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS + OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC + +#undef OAKNUT_DETECT_CAP +#undef OAKNUT_DETECT_CAP2 + + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_idregs.hpp b/include/oaknut/feature_detection/feature_detection_idregs.hpp new file mode 100644 index 00000000..c26e7a92 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_idregs.hpp @@ -0,0 +1,167 @@ +// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +CpuFeatures detect_features_via_id_registers(id::IdRegisters regs) +{ + CpuFeatures result; + + if (regs.pfr0.FP() >= 0) + result |= CpuFeatures{CpuFeature::FP}; + if (regs.pfr0.AdvSIMD() >= 0) + result |= CpuFeatures{CpuFeature::ASIMD}; + if (regs.isar0.AES() >= 1) + result |= CpuFeatures{CpuFeature::AES}; + if (regs.isar0.AES() >= 2) + result |= CpuFeatures{CpuFeature::PMULL}; + if (regs.isar0.SHA1() >= 1) + result |= CpuFeatures{CpuFeature::SHA1}; + if (regs.isar0.SHA2() >= 1) + result |= CpuFeatures{CpuFeature::SHA256}; + if (regs.isar0.CRC32() >= 1) + result |= CpuFeatures{CpuFeature::CRC32}; + if (regs.isar0.Atomic() >= 2) + result |= CpuFeatures{CpuFeature::LSE}; + if (regs.pfr0.FP() >= 1 && regs.pfr0.AdvSIMD() >= 1) + result |= CpuFeatures{CpuFeature::FP16Conv, CpuFeature::FP16}; + if (regs.isar0.RDM() >= 1) + result |= CpuFeatures{CpuFeature::RDM}; + if (regs.isar1.JSCVT() >= 1) + result |= CpuFeatures{CpuFeature::JSCVT}; + if (regs.isar1.FCMA() >= 1) + result |= CpuFeatures{CpuFeature::FCMA}; + if (regs.isar1.LRCPC() >= 1) + result |= CpuFeatures{CpuFeature::LRCPC}; + if (regs.isar1.DPB() >= 1) + result |= CpuFeatures{CpuFeature::DPB}; + if (regs.isar0.SHA3() >= 1) + result |= CpuFeatures{CpuFeature::SHA3}; + if (regs.isar0.SM3() >= 1) + result |= CpuFeatures{CpuFeature::SM3}; + if (regs.isar0.SM4() >= 1) + result |= CpuFeatures{CpuFeature::SM4}; + if (regs.isar0.DP() >= 1) + result |= CpuFeatures{CpuFeature::DotProd}; + if (regs.isar0.SHA2() >= 2) + result |= CpuFeatures{CpuFeature::SHA512}; + if (regs.pfr0.SVE() >= 1) + result |= CpuFeatures{CpuFeature::SVE}; + if (regs.isar0.FHM() >= 1) + result |= CpuFeatures{CpuFeature::FHM}; + if (regs.pfr0.DIT() >= 1) + result |= CpuFeatures{CpuFeature::DIT}; + if (regs.mmfr2.AT() >= 1) + result |= CpuFeatures{CpuFeature::LSE2}; + if (regs.isar1.LRCPC() >= 2) + result |= CpuFeatures{CpuFeature::LRCPC2}; + if (regs.isar0.TS() >= 1) + result |= CpuFeatures{CpuFeature::FlagM}; + if (regs.pfr1.SSBS() >= 2) + result |= CpuFeatures{CpuFeature::SSBS}; + if (regs.isar1.SB() >= 1) + result |= CpuFeatures{CpuFeature::SB}; + if (regs.isar1.APA() >= 1 || regs.isar1.API() >= 1) + result |= CpuFeatures{CpuFeature::PACA}; + if (regs.isar1.GPA() >= 1 || regs.isar1.GPI() >= 1) + result |= CpuFeatures{CpuFeature::PACG}; + if (regs.isar1.DPB() >= 2) + result |= CpuFeatures{CpuFeature::DPB2}; + if (regs.zfr0.SVEver() >= 1) + result |= CpuFeatures{CpuFeature::SVE2}; + if (regs.zfr0.AES() >= 1) + result |= CpuFeatures{CpuFeature::SVE_AES}; + if (regs.zfr0.AES() >= 2) + result |= CpuFeatures{CpuFeature::SVE_PMULL128}; + if (regs.zfr0.BitPerm() >= 1) + result |= CpuFeatures{CpuFeature::SVE_BITPERM}; + if (regs.zfr0.SHA3() >= 1) + result |= CpuFeatures{CpuFeature::SVE_SHA3}; + if (regs.zfr0.SM4() >= 1) + result |= CpuFeatures{CpuFeature::SVE_SM4}; + if (regs.isar0.TS() >= 2) + result |= CpuFeatures{CpuFeature::FlagM2}; + if (regs.isar1.FRINTTS() >= 1) + result |= CpuFeatures{CpuFeature::FRINTTS}; + if (regs.zfr0.I8MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_I8MM}; + if (regs.zfr0.F32MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_F32MM}; + if (regs.zfr0.F64MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_F64MM}; + if (regs.zfr0.BF16() >= 1) + result |= CpuFeatures{CpuFeature::SVE_BF16}; + if (regs.isar1.I8MM() >= 1) + result |= CpuFeatures{CpuFeature::I8MM}; + if (regs.isar1.BF16() >= 1) + result |= CpuFeatures{CpuFeature::BF16}; + if (regs.isar1.DGH() >= 1) + result |= CpuFeatures{CpuFeature::DGH}; + if (regs.isar0.RNDR() >= 1) + result |= CpuFeatures{CpuFeature::RNG}; + if (regs.pfr1.BT() >= 1) + result |= CpuFeatures{CpuFeature::BTI}; + if (regs.pfr1.MTE() >= 2) + result |= CpuFeatures{CpuFeature::MTE}; + if (regs.mmfr0.ECV() >= 1) + result |= CpuFeatures{CpuFeature::ECV}; + if (regs.mmfr1.AFP() >= 1) + result |= CpuFeatures{CpuFeature::AFP}; + if (regs.isar2.RPRES() >= 1) + result |= CpuFeatures{CpuFeature::RPRES}; + if (regs.pfr1.MTE() >= 3) + result |= CpuFeatures{CpuFeature::MTE3}; + if (regs.pfr1.SME() >= 1) + result |= CpuFeatures{CpuFeature::SME}; + if (regs.smfr0.I16I64() == 0b1111) + result |= CpuFeatures{CpuFeature::SME_I16I64}; + if (regs.smfr0.F64F64() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F64F64}; + if (regs.smfr0.I8I32() == 0b1111) + result |= CpuFeatures{CpuFeature::SME_I8I32}; + if (regs.smfr0.F16F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F16F32}; + if (regs.smfr0.B16F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_B16F32}; + if (regs.smfr0.F32F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F32F32}; + if (regs.smfr0.FA64() == 0b1) + result |= CpuFeatures{CpuFeature::SME_FA64}; + if (regs.isar2.WFxT() >= 2) + result |= CpuFeatures{CpuFeature::WFxT}; + if (regs.isar1.BF16() >= 2) + result |= CpuFeatures{CpuFeature::EBF16}; + if (regs.zfr0.BF16() >= 2) + result |= CpuFeatures{CpuFeature::SVE_EBF16}; + if (regs.isar2.CSSC() >= 1) + result |= CpuFeatures{CpuFeature::CSSC}; + if (regs.isar2.RPRFM() >= 1) + result |= CpuFeatures{CpuFeature::RPRFM}; + if (regs.zfr0.SVEver() >= 2) + result |= CpuFeatures{CpuFeature::SVE2p1}; + if (regs.smfr0.SMEver() >= 1) + result |= CpuFeatures{CpuFeature::SME2}; + if (regs.smfr0.SMEver() >= 2) + result |= CpuFeatures{CpuFeature::SME2p1}; + if (regs.smfr0.I16I32() == 0b0101) + result |= CpuFeatures{CpuFeature::SME_I16I32}; + if (regs.smfr0.BI32I32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_BI32I32}; + if (regs.smfr0.B16B16() == 0b1) + result |= CpuFeatures{CpuFeature::SME_B16B16}; + if (regs.smfr0.F16F16() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F16F16}; + if (regs.isar2.MOPS() >= 1) + result |= CpuFeatures{CpuFeature::MOPS}; + if (regs.isar2.BC() >= 1) + result |= CpuFeatures{CpuFeature::HBC}; + + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_linux.hpp b/include/oaknut/feature_detection/feature_detection_linux.hpp new file mode 100644 index 00000000..6310eaca --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_linux.hpp @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/id_registers.hpp" +#include "oaknut/feature_detection/read_id_registers_directly.hpp" + +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +namespace oaknut { + +inline CpuFeatures detect_features_via_hwcap() +{ + const unsigned long hwcap = ::getauxval(AT_HWCAP); + const unsigned long hwcap2 = ::getauxval(AT_HWCAP2); + return detect_features_via_hwcap(hwcap, hwcap2); +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_hwcap(); +} + +inline std::optional read_id_registers() +{ + constexpr unsigned long hwcap_cpuid = (1 << 11); + if (::getauxval(AT_HWCAP) & hwcap_cpuid) { + return id::read_id_registers_directly(); + } + return std::nullopt; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_netbsd.hpp b/include/oaknut/feature_detection/feature_detection_netbsd.hpp new file mode 100644 index 00000000..cdb1deb1 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_netbsd.hpp @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/feature_detection_idregs.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +inline std::optional read_id_registers(std::size_t core_index) +{ + const std::string path = "machdep.cpu" + std::to_string(core_index) + ".cpu_id"; + + aarch64_sysctl_cpu_id id; + std::size_t id_len = sizeof id; + + if (sysctlbyname(path.c_str(), &id, &id_len, nullptr, 0) < 0) + return std::nullopt; + + return id::IdRegisters{ + id.ac_midr, + id::Pfr0Register{id.ac_aa64pfr0}, + id::Pfr1Register{id.ac_aa64pfr1}, + id::Pfr2Register{0}, + id::Zfr0Register{id.ac_aa64zfr0}, + id::Smfr0Register{0}, + id::Isar0Register{id.ac_aa64isar0}, + id::Isar1Register{id.ac_aa64isar1}, + id::Isar2Register{0}, + id::Isar3Register{0}, + id::Mmfr0Register{id.ac_aa64mmfr0}, + id::Mmfr1Register{id.ac_aa64mmfr1}, + id::Mmfr2Register{id.ac_aa64mmfr2}, + id::Mmfr3Register{0}, + id::Mmfr4Register{0}, + }; +} + +inline std::size_t get_core_count() +{ + int result = 0; + size_t result_size = sizeof(result); + const std::array mib{CTL_HW, HW_NCPU}; + if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0) + return 0; + return result; +} + +inline CpuFeatures detect_features() +{ + std::optional result; + + const std::size_t core_count = get_core_count(); + for (std::size_t core_index = 0; core_index < core_count; core_index++) { + if (const std::optional id_regs = read_id_registers(core_index)) { + const CpuFeatures current_features = detect_features_via_id_registers(*id_regs); + if (result) { + result = *result & current_features; + } else { + result = current_features; + } + } + } + + return result.value_or(CpuFeatures{}); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_openbsd.hpp b/include/oaknut/feature_detection/feature_detection_openbsd.hpp new file mode 100644 index 00000000..8514a2bf --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_openbsd.hpp @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/feature_detection_idregs.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +namespace detail { + +inline std::uint64_t read_id_register(int index) +{ + uint64_t result = 0; + size_t result_size = sizeof(result); + std::array mib{CTL_MACHDEP, index}; + if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0) + return 0; + return result; +} + +} // namespace detail + +inline std::optional read_id_registers() +{ + // See OpenBSD source: sys/arch/arm64/include/cpu.h + + return id::IdRegisters{ + std::nullopt, // No easy way of getting MIDR_EL1 other than reading /proc/cpu + id::Pfr0Register{detail::read_id_register(8)}, // CPU_ID_AA64PFR0 + id::Pfr1Register{detail::read_id_register(9)}, // CPU_ID_AA64PFR1 + id::Pfr2Register{0}, + id::Zfr0Register{detail::read_id_register(11)}, // CPU_ID_AA64ZFR0 + id::Smfr0Register{detail::read_id_register(10)}, // CPU_ID_AA64SMFR0 + id::Isar0Register{detail::read_id_register(2)}, // CPU_ID_AA64ISAR0 + id::Isar1Register{detail::read_id_register(3)}, // CPU_ID_AA64ISAR1 + id::Isar2Register{detail::read_id_register(4)}, // CPU_ID_AA64ISAR2 + id::Isar3Register{0}, + id::Mmfr0Register{detail::read_id_register(5)}, // CPU_ID_AA64MMFR0 + id::Mmfr1Register{detail::read_id_register(6)}, // CPU_ID_AA64MMFR1 + id::Mmfr2Register{detail::read_id_register(7)}, // CPU_ID_AA64MMFR2 + id::Mmfr3Register{0}, + id::Mmfr4Register{0}, + }; +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_id_registers(*read_id_registers()); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_w32.hpp b/include/oaknut/feature_detection/feature_detection_w32.hpp new file mode 100644 index 00000000..366a2600 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_w32.hpp @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include + +#include +#include +#include + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +namespace detail { + +inline std::optional read_registry_hklm(const std::string& subkey, const std::string& name) +{ + std::uint64_t value; + DWORD value_len = sizeof(value); + if (::RegGetValueA(HKEY_LOCAL_MACHINE, subkey.c_str(), name.c_str(), RRF_RT_REG_QWORD, nullptr, &value, &value_len) == ERROR_SUCCESS) { + return value; + } + return std::nullopt; +} + +inline std::uint64_t read_id_register(std::size_t core_index, const std::string& name) +{ + return read_registry_hklm("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\" + std::to_string(core_index), "CP " + name).value_or(0); +} + +} // namespace detail + +// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + +inline CpuFeatures detect_features_via_IsProcessorFeaturePresent() +{ + CpuFeatures result; + + if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256}; + if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::CRC32}; + if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::LSE}; + if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::DotProd}; + if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::JSCVT}; + if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::LRCPC}; + + return result; +} + +inline CpuFeatures detect_features() +{ + CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD}; + result |= detect_features_via_IsProcessorFeaturePresent(); + return result; +} + +inline std::size_t get_core_count() +{ + ::SYSTEM_INFO sys_info; + ::GetSystemInfo(&sys_info); + return sys_info.dwNumberOfProcessors; +} + +inline std::optional read_id_registers(std::size_t core_index) +{ + return id::IdRegisters{ + detail::read_id_register(core_index, "4000"), + id::Pfr0Register{detail::read_id_register(core_index, "4020")}, + id::Pfr1Register{detail::read_id_register(core_index, "4021")}, + id::Pfr2Register{detail::read_id_register(core_index, "4022")}, + id::Zfr0Register{detail::read_id_register(core_index, "4024")}, + id::Smfr0Register{detail::read_id_register(core_index, "4025")}, + id::Isar0Register{detail::read_id_register(core_index, "4030")}, + id::Isar1Register{detail::read_id_register(core_index, "4031")}, + id::Isar2Register{detail::read_id_register(core_index, "4032")}, + id::Isar3Register{detail::read_id_register(core_index, "4033")}, + id::Mmfr0Register{detail::read_id_register(core_index, "4038")}, + id::Mmfr1Register{detail::read_id_register(core_index, "4039")}, + id::Mmfr2Register{detail::read_id_register(core_index, "403A")}, + id::Mmfr3Register{detail::read_id_register(core_index, "403B")}, + id::Mmfr4Register{detail::read_id_register(core_index, "403C")}, + }; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/id_registers.hpp b/include/oaknut/feature_detection/id_registers.hpp new file mode 100644 index 00000000..fa779618 --- /dev/null +++ b/include/oaknut/feature_detection/id_registers.hpp @@ -0,0 +1,318 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +namespace oaknut::id { + +namespace detail { + +template +constexpr unsigned extract_bit(std::uint64_t value) +{ + return (value >> lsb) & 1; +} + +template +constexpr unsigned extract_field(std::uint64_t value) +{ + return (value >> lsb) & 0xf; +} + +template +constexpr signed extract_signed_field(std::uint64_t value) +{ + return static_cast(static_cast(value << (60 - lsb)) >> 60); +} + +} // namespace detail + +struct Pfr0Register { + std::uint64_t value; + + constexpr signed FP() const { return detail::extract_signed_field<16>(value); } + constexpr signed AdvSIMD() const { return detail::extract_signed_field<20>(value); } + constexpr unsigned GIC() const { return detail::extract_field<24>(value); } + constexpr unsigned RAS() const { return detail::extract_field<28>(value); } + constexpr unsigned SVE() const { return detail::extract_field<32>(value); } + constexpr unsigned SEL2() const { return detail::extract_field<36>(value); } + constexpr unsigned MPAM() const { return detail::extract_field<40>(value); } + constexpr unsigned AMU() const { return detail::extract_field<44>(value); } + constexpr unsigned DIT() const { return detail::extract_field<48>(value); } + constexpr unsigned RME() const { return detail::extract_field<52>(value); } + constexpr unsigned CSV2() const { return detail::extract_field<56>(value); } + constexpr unsigned CSV3() const { return detail::extract_field<60>(value); } +}; + +struct Pfr1Register { + std::uint64_t value; + + constexpr unsigned BT() const { return detail::extract_field<0>(value); } + constexpr unsigned SSBS() const { return detail::extract_field<4>(value); } + constexpr unsigned MTE() const { return detail::extract_field<8>(value); } + constexpr unsigned RAS_frac() const { return detail::extract_field<12>(value); } + constexpr unsigned MPAM_frac() const { return detail::extract_field<16>(value); } + // [20:23] - reserved + constexpr unsigned SME() const { return detail::extract_field<24>(value); } + constexpr unsigned RNDR_trap() const { return detail::extract_field<28>(value); } + constexpr unsigned CSV2_frac() const { return detail::extract_field<32>(value); } + constexpr unsigned NMI() const { return detail::extract_field<36>(value); } + constexpr unsigned MTE_frac() const { return detail::extract_field<40>(value); } + constexpr unsigned GCS() const { return detail::extract_field<44>(value); } + constexpr unsigned THE() const { return detail::extract_field<48>(value); } + constexpr unsigned MTEX() const { return detail::extract_field<52>(value); } + constexpr unsigned DF2() const { return detail::extract_field<56>(value); } + constexpr unsigned PFAR() const { return detail::extract_field<60>(value); } +}; + +struct Pfr2Register { + std::uint64_t value; + + constexpr unsigned MTEPERM() const { return detail::extract_field<0>(value); } + constexpr unsigned MTESTOREONLY() const { return detail::extract_field<4>(value); } + constexpr unsigned MTEFAR() const { return detail::extract_field<8>(value); } + // [12:31] reserved + constexpr unsigned FPMR() const { return detail::extract_field<32>(value); } + // [36:63] reserved +}; + +struct Zfr0Register { + std::uint64_t value; + + constexpr unsigned SVEver() const { return detail::extract_field<0>(value); } + constexpr unsigned AES() const { return detail::extract_field<4>(value); } + // [8:15] reserved + constexpr unsigned BitPerm() const { return detail::extract_field<16>(value); } + constexpr unsigned BF16() const { return detail::extract_field<20>(value); } + constexpr unsigned B16B16() const { return detail::extract_field<24>(value); } + // [28:31] reserved + constexpr unsigned SHA3() const { return detail::extract_field<32>(value); } + // [36:39] reserved + constexpr unsigned SM4() const { return detail::extract_field<40>(value); } + constexpr unsigned I8MM() const { return detail::extract_field<44>(value); } + // [48:51] reserved + constexpr unsigned F32MM() const { return detail::extract_field<52>(value); } + constexpr unsigned F64MM() const { return detail::extract_field<56>(value); } + // [60:63] reserved +}; + +struct Smfr0Register { + std::uint64_t value; + + // [0:27] reserved + constexpr unsigned SF8DP2() const { return detail::extract_bit<28>(value); } + constexpr unsigned SF8DP4() const { return detail::extract_bit<29>(value); } + constexpr unsigned SF8FMA() const { return detail::extract_bit<30>(value); } + // [31] reserved + constexpr unsigned F32F32() const { return detail::extract_bit<32>(value); } + constexpr unsigned BI32I32() const { return detail::extract_bit<33>(value); } + constexpr unsigned B16F32() const { return detail::extract_bit<34>(value); } + constexpr unsigned F16F32() const { return detail::extract_bit<35>(value); } + constexpr unsigned I8I32() const { return detail::extract_field<36>(value); } + constexpr unsigned F8F32() const { return detail::extract_bit<40>(value); } + constexpr unsigned F8F16() const { return detail::extract_bit<41>(value); } + constexpr unsigned F16F16() const { return detail::extract_bit<42>(value); } + constexpr unsigned B16B16() const { return detail::extract_bit<43>(value); } + constexpr unsigned I16I32() const { return detail::extract_field<44>(value); } + constexpr unsigned F64F64() const { return detail::extract_bit<48>(value); } + // [49:51] reserved + constexpr unsigned I16I64() const { return detail::extract_field<52>(value); } + constexpr unsigned SMEver() const { return detail::extract_field<56>(value); } + constexpr unsigned LUTv2() const { return detail::extract_bit<60>(value); } + // [61:62] reserved + constexpr unsigned FA64() const { return detail::extract_bit<63>(value); } +}; + +struct Isar0Register { + std::uint64_t value; + + // [0:3] reserved + constexpr unsigned AES() const { return detail::extract_field<4>(value); } + constexpr unsigned SHA1() const { return detail::extract_field<8>(value); } + constexpr unsigned SHA2() const { return detail::extract_field<12>(value); } + constexpr unsigned CRC32() const { return detail::extract_field<16>(value); } + constexpr unsigned Atomic() const { return detail::extract_field<20>(value); } + constexpr unsigned TME() const { return detail::extract_field<24>(value); } + constexpr unsigned RDM() const { return detail::extract_field<28>(value); } + constexpr unsigned SHA3() const { return detail::extract_field<32>(value); } + constexpr unsigned SM3() const { return detail::extract_field<36>(value); } + constexpr unsigned SM4() const { return detail::extract_field<40>(value); } + constexpr unsigned DP() const { return detail::extract_field<44>(value); } + constexpr unsigned FHM() const { return detail::extract_field<48>(value); } + constexpr unsigned TS() const { return detail::extract_field<52>(value); } + constexpr unsigned TLB() const { return detail::extract_field<56>(value); } + constexpr unsigned RNDR() const { return detail::extract_field<60>(value); } +}; + +struct Isar1Register { + std::uint64_t value; + + constexpr unsigned DPB() const { return detail::extract_field<0>(value); } + constexpr unsigned APA() const { return detail::extract_field<4>(value); } + constexpr unsigned API() const { return detail::extract_field<8>(value); } + constexpr unsigned JSCVT() const { return detail::extract_field<12>(value); } + constexpr unsigned FCMA() const { return detail::extract_field<16>(value); } + constexpr unsigned LRCPC() const { return detail::extract_field<20>(value); } + constexpr unsigned GPA() const { return detail::extract_field<24>(value); } + constexpr unsigned GPI() const { return detail::extract_field<28>(value); } + constexpr unsigned FRINTTS() const { return detail::extract_field<32>(value); } + constexpr unsigned SB() const { return detail::extract_field<36>(value); } + constexpr unsigned SPECRES() const { return detail::extract_field<40>(value); } + constexpr unsigned BF16() const { return detail::extract_field<44>(value); } + constexpr unsigned DGH() const { return detail::extract_field<48>(value); } + constexpr unsigned I8MM() const { return detail::extract_field<52>(value); } + constexpr unsigned XS() const { return detail::extract_field<56>(value); } + constexpr unsigned LS64() const { return detail::extract_field<60>(value); } +}; + +struct Isar2Register { + std::uint64_t value; + + constexpr unsigned WFxT() const { return detail::extract_field<0>(value); } + constexpr unsigned RPRES() const { return detail::extract_field<4>(value); } + constexpr unsigned GPA3() const { return detail::extract_field<8>(value); } + constexpr unsigned APA3() const { return detail::extract_field<12>(value); } + constexpr unsigned MOPS() const { return detail::extract_field<16>(value); } + constexpr unsigned BC() const { return detail::extract_field<20>(value); } + constexpr unsigned PAC_frac() const { return detail::extract_field<24>(value); } + constexpr unsigned CLRBHB() const { return detail::extract_field<28>(value); } + constexpr unsigned SYSREG_128() const { return detail::extract_field<32>(value); } + constexpr unsigned SYSINSTR_128() const { return detail::extract_field<36>(value); } + constexpr unsigned PRFMSLC() const { return detail::extract_field<40>(value); } + // [44:47] reserved + constexpr unsigned RPRFM() const { return detail::extract_field<48>(value); } + constexpr unsigned CSSC() const { return detail::extract_field<52>(value); } + constexpr unsigned LUT() const { return detail::extract_field<56>(value); } + constexpr unsigned ATS1A() const { return detail::extract_field<60>(value); } +}; + +struct Isar3Register { + std::uint64_t value; + + constexpr unsigned CPA() const { return detail::extract_field<0>(value); } + constexpr unsigned FAMINMAX() const { return detail::extract_field<4>(value); } + constexpr unsigned TLBIW() const { return detail::extract_field<8>(value); } + // [12:63] reserved +}; + +struct Mmfr0Register { + std::uint64_t value; + + constexpr unsigned PARange() const { return detail::extract_field<0>(value); } + constexpr unsigned ASIDBits() const { return detail::extract_field<4>(value); } + constexpr unsigned BigEnd() const { return detail::extract_field<8>(value); } + constexpr unsigned SNSMem() const { return detail::extract_field<12>(value); } + constexpr unsigned BigEndEL0() const { return detail::extract_field<16>(value); } + constexpr unsigned TGran16() const { return detail::extract_field<20>(value); } + constexpr unsigned TGran64() const { return detail::extract_field<24>(value); } + constexpr unsigned TGran4() const { return detail::extract_field<28>(value); } + constexpr unsigned TGran16_2() const { return detail::extract_field<32>(value); } + constexpr unsigned TGran64_2() const { return detail::extract_field<36>(value); } + constexpr unsigned TGran4_2() const { return detail::extract_field<40>(value); } + constexpr unsigned ExS() const { return detail::extract_field<44>(value); } + // [48:55] reserved + constexpr unsigned FGT() const { return detail::extract_field<56>(value); } + constexpr unsigned ECV() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr1Register { + std::uint64_t value; + + constexpr unsigned HAFDBS() const { return detail::extract_field<0>(value); } + constexpr unsigned VMIDBits() const { return detail::extract_field<4>(value); } + constexpr unsigned VH() const { return detail::extract_field<8>(value); } + constexpr unsigned HPDS() const { return detail::extract_field<12>(value); } + constexpr unsigned LO() const { return detail::extract_field<16>(value); } + constexpr unsigned PAN() const { return detail::extract_field<20>(value); } + constexpr unsigned SpecSEI() const { return detail::extract_field<24>(value); } + constexpr unsigned XNX() const { return detail::extract_field<28>(value); } + constexpr unsigned TWED() const { return detail::extract_field<32>(value); } + constexpr unsigned ETS() const { return detail::extract_field<36>(value); } + constexpr unsigned HCX() const { return detail::extract_field<40>(value); } + constexpr unsigned AFP() const { return detail::extract_field<44>(value); } + constexpr unsigned nTLBPA() const { return detail::extract_field<48>(value); } + constexpr unsigned TIDCP1() const { return detail::extract_field<52>(value); } + constexpr unsigned CMOW() const { return detail::extract_field<56>(value); } + constexpr unsigned ECBHB() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr2Register { + std::uint64_t value; + + constexpr unsigned CnP() const { return detail::extract_field<0>(value); } + constexpr unsigned UAO() const { return detail::extract_field<4>(value); } + constexpr unsigned LSM() const { return detail::extract_field<8>(value); } + constexpr unsigned IESB() const { return detail::extract_field<12>(value); } + constexpr unsigned VARange() const { return detail::extract_field<16>(value); } + constexpr unsigned CCIDX() const { return detail::extract_field<20>(value); } + constexpr unsigned NV() const { return detail::extract_field<24>(value); } + constexpr unsigned ST() const { return detail::extract_field<28>(value); } + constexpr unsigned AT() const { return detail::extract_field<32>(value); } + constexpr unsigned IDS() const { return detail::extract_field<36>(value); } + constexpr unsigned FWB() const { return detail::extract_field<40>(value); } + // [44:47] reserved + constexpr unsigned TTL() const { return detail::extract_field<48>(value); } + constexpr unsigned BBM() const { return detail::extract_field<52>(value); } + constexpr unsigned EVT() const { return detail::extract_field<56>(value); } + constexpr unsigned E0PD() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr3Register { + std::uint64_t value; + + constexpr unsigned TCRX() const { return detail::extract_field<0>(value); } + constexpr unsigned SCTLRX() const { return detail::extract_field<4>(value); } + constexpr unsigned S1PIE() const { return detail::extract_field<8>(value); } + constexpr unsigned S2PIE() const { return detail::extract_field<12>(value); } + constexpr unsigned S1POE() const { return detail::extract_field<16>(value); } + constexpr unsigned S2POE() const { return detail::extract_field<20>(value); } + constexpr unsigned AIE() const { return detail::extract_field<24>(value); } + constexpr unsigned MEC() const { return detail::extract_field<28>(value); } + constexpr unsigned D128() const { return detail::extract_field<32>(value); } + constexpr unsigned D128_2() const { return detail::extract_field<36>(value); } + constexpr unsigned SNERR() const { return detail::extract_field<40>(value); } + constexpr unsigned ANERR() const { return detail::extract_field<44>(value); } + // [48:51] reserved + constexpr unsigned SDERR() const { return detail::extract_field<52>(value); } + constexpr unsigned ADERR() const { return detail::extract_field<56>(value); } + constexpr unsigned Spec_FPACC() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr4Register { + std::uint64_t value; + + // [0:3] reserved + constexpr unsigned EIESB() const { return detail::extract_field<4>(value); } + constexpr unsigned ASID2() const { return detail::extract_field<8>(value); } + constexpr unsigned HACDBS() const { return detail::extract_field<12>(value); } + constexpr unsigned FGWTE3() const { return detail::extract_field<16>(value); } + constexpr unsigned NV_frac() const { return detail::extract_field<20>(value); } + constexpr unsigned E2H0() const { return detail::extract_field<24>(value); } + // [28:35] reserved + constexpr unsigned E3DSE() const { return detail::extract_field<36>(value); } + // [40:63] reserved +}; + +struct IdRegisters { + std::optional midr; + Pfr0Register pfr0; + Pfr1Register pfr1; + Pfr2Register pfr2; + Zfr0Register zfr0; + Smfr0Register smfr0; + Isar0Register isar0; + Isar1Register isar1; + Isar2Register isar2; + Isar3Register isar3; + Mmfr0Register mmfr0; + Mmfr1Register mmfr1; + Mmfr2Register mmfr2; + Mmfr3Register mmfr3; + Mmfr4Register mmfr4; +}; + +} // namespace oaknut::id diff --git a/include/oaknut/feature_detection/read_id_registers_directly.hpp b/include/oaknut/feature_detection/read_id_registers_directly.hpp new file mode 100644 index 00000000..04db5188 --- /dev/null +++ b/include/oaknut/feature_detection/read_id_registers_directly.hpp @@ -0,0 +1,52 @@ +#include + +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut::id { + +inline IdRegisters read_id_registers_directly() +{ + std::uint64_t midr, pfr0, pfr1, pfr2, isar0, isar1, isar2, isar3, mmfr0, mmfr1, mmfr2, mmfr3, mmfr4, zfr0, smfr0; + +#define OAKNUT_READ_REGISTER(reg, var) \ + __asm__("mrs %0, " #reg \ + : "=r"(var)) + + OAKNUT_READ_REGISTER(s3_0_c0_c0_0, midr); + OAKNUT_READ_REGISTER(s3_0_c0_c4_0, pfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c4_1, pfr1); + OAKNUT_READ_REGISTER(s3_0_c0_c4_2, pfr2); + OAKNUT_READ_REGISTER(s3_0_c0_c4_4, zfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c4_5, smfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c6_0, isar0); + OAKNUT_READ_REGISTER(s3_0_c0_c6_1, isar1); + OAKNUT_READ_REGISTER(s3_0_c0_c6_2, isar2); + OAKNUT_READ_REGISTER(s3_0_c0_c6_3, isar3); + OAKNUT_READ_REGISTER(s3_0_c0_c7_0, mmfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c7_1, mmfr1); + OAKNUT_READ_REGISTER(s3_0_c0_c7_2, mmfr2); + OAKNUT_READ_REGISTER(s3_0_c0_c7_3, mmfr3); + OAKNUT_READ_REGISTER(s3_0_c0_c7_4, mmfr4); + +#undef OAKNUT_READ_ID_REGISTER + + return IdRegisters{ + midr, + Pfr0Register{pfr0}, + Pfr1Register{pfr1}, + Pfr2Register{pfr2}, + Zfr0Register{zfr0}, + Smfr0Register{smfr0}, + Isar0Register{isar0}, + Isar1Register{isar1}, + Isar2Register{isar2}, + Isar3Register{isar3}, + Mmfr0Register{mmfr0}, + Mmfr1Register{mmfr1}, + Mmfr2Register{mmfr2}, + Mmfr3Register{mmfr3}, + Mmfr4Register{mmfr4}, + }; +} + +} // namespace oaknut::id diff --git a/include/oaknut/impl/arm64_encode_helpers.inc.hpp b/include/oaknut/impl/arm64_encode_helpers.inc.hpp index 3081d943..fb636b78 100644 --- a/include/oaknut/impl/arm64_encode_helpers.inc.hpp +++ b/include/oaknut/impl/arm64_encode_helpers.inc.hpp @@ -8,7 +8,7 @@ static constexpr std::uint32_t pdep(std::uint32_t val) std::uint32_t res = 0; for (std::uint32_t bb = 1; mask; bb += bb) { if (val & bb) - res |= mask & -mask; + res |= mask & (~mask + 1); mask &= mask - 1; } return res; @@ -107,6 +107,61 @@ std::uint32_t encode(List v) return encode(v.m_base); } +template +std::uint32_t encode(AddrOffset v) +{ + static_assert(std::popcount(splat) == size - align); + + const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) { + const std::ptrdiff_t diff = target_offset - current_offset; + return pdep(AddrOffset::encode(diff)); + }; + + return std::visit(detail::overloaded{ + [&](std::uint32_t encoding) -> std::uint32_t { + return pdep(encoding); + }, + [&](Label* label) -> std::uint32_t { + if (label->m_offset) { + return encode_fn(Policy::offset(), *label->m_offset); + } + + label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast(encode_fn)}); + return 0u; + }, + [&](const void* p) -> std::uint32_t { + const std::ptrdiff_t diff = reinterpret_cast(p) - Policy::template xptr(); + return pdep(AddrOffset::encode(diff)); + }, + }, + v.m_payload); +} + +template +std::uint32_t encode(PageOffset v) +{ + static_assert(std::popcount(splat) == size); + + const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) { + return pdep(PageOffset::encode(static_cast(current_offset), static_cast(target_offset))); + }; + + return std::visit(detail::overloaded{ + [&](Label* label) -> std::uint32_t { + if (label->m_offset) { + return encode_fn(Policy::offset(), *label->m_offset); + } + + label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast(encode_fn)}); + return 0u; + }, + [&](const void* p) -> std::uint32_t { + return pdep(PageOffset::encode(Policy::template xptr(), reinterpret_cast(p))); + }, + }, + v.m_payload); +} + #undef OAKNUT_STD_ENCODE void addsubext_lsl_correction(AddSubExt& ext, XRegSp) diff --git a/include/oaknut/impl/cpu_feature.inc.hpp b/include/oaknut/impl/cpu_feature.inc.hpp new file mode 100644 index 00000000..1f7cd879 --- /dev/null +++ b/include/oaknut/impl/cpu_feature.inc.hpp @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +OAKNUT_CPU_FEATURE(FP) +OAKNUT_CPU_FEATURE(ASIMD) +OAKNUT_CPU_FEATURE(AES) +OAKNUT_CPU_FEATURE(PMULL) +OAKNUT_CPU_FEATURE(SHA1) +OAKNUT_CPU_FEATURE(SHA256) +OAKNUT_CPU_FEATURE(CRC32) +OAKNUT_CPU_FEATURE(LSE) +OAKNUT_CPU_FEATURE(FP16Conv) +OAKNUT_CPU_FEATURE(FP16) +OAKNUT_CPU_FEATURE(RDM) +OAKNUT_CPU_FEATURE(JSCVT) +OAKNUT_CPU_FEATURE(FCMA) +OAKNUT_CPU_FEATURE(LRCPC) +OAKNUT_CPU_FEATURE(DPB) +OAKNUT_CPU_FEATURE(SHA3) +OAKNUT_CPU_FEATURE(SM3) +OAKNUT_CPU_FEATURE(SM4) +OAKNUT_CPU_FEATURE(DotProd) +OAKNUT_CPU_FEATURE(SHA512) +OAKNUT_CPU_FEATURE(SVE) +OAKNUT_CPU_FEATURE(FHM) +OAKNUT_CPU_FEATURE(DIT) +OAKNUT_CPU_FEATURE(LSE2) +OAKNUT_CPU_FEATURE(LRCPC2) +OAKNUT_CPU_FEATURE(FlagM) +OAKNUT_CPU_FEATURE(SSBS) +OAKNUT_CPU_FEATURE(SB) +OAKNUT_CPU_FEATURE(PACA) +OAKNUT_CPU_FEATURE(PACG) +OAKNUT_CPU_FEATURE(DPB2) +OAKNUT_CPU_FEATURE(SVE2) +OAKNUT_CPU_FEATURE(SVE_AES) +OAKNUT_CPU_FEATURE(SVE_PMULL128) +OAKNUT_CPU_FEATURE(SVE_BITPERM) +OAKNUT_CPU_FEATURE(SVE_SHA3) +OAKNUT_CPU_FEATURE(SVE_SM4) +OAKNUT_CPU_FEATURE(FlagM2) +OAKNUT_CPU_FEATURE(FRINTTS) +OAKNUT_CPU_FEATURE(SVE_I8MM) +OAKNUT_CPU_FEATURE(SVE_F32MM) +OAKNUT_CPU_FEATURE(SVE_F64MM) +OAKNUT_CPU_FEATURE(SVE_BF16) +OAKNUT_CPU_FEATURE(I8MM) +OAKNUT_CPU_FEATURE(BF16) +OAKNUT_CPU_FEATURE(DGH) +OAKNUT_CPU_FEATURE(RNG) +OAKNUT_CPU_FEATURE(BTI) +OAKNUT_CPU_FEATURE(MTE) +OAKNUT_CPU_FEATURE(ECV) +OAKNUT_CPU_FEATURE(AFP) +OAKNUT_CPU_FEATURE(RPRES) +OAKNUT_CPU_FEATURE(MTE3) +OAKNUT_CPU_FEATURE(SME) +OAKNUT_CPU_FEATURE(SME_I16I64) +OAKNUT_CPU_FEATURE(SME_F64F64) +OAKNUT_CPU_FEATURE(SME_I8I32) +OAKNUT_CPU_FEATURE(SME_F16F32) +OAKNUT_CPU_FEATURE(SME_B16F32) +OAKNUT_CPU_FEATURE(SME_F32F32) +OAKNUT_CPU_FEATURE(SME_FA64) +OAKNUT_CPU_FEATURE(WFxT) +OAKNUT_CPU_FEATURE(EBF16) +OAKNUT_CPU_FEATURE(SVE_EBF16) +OAKNUT_CPU_FEATURE(CSSC) +OAKNUT_CPU_FEATURE(RPRFM) +OAKNUT_CPU_FEATURE(SVE2p1) +OAKNUT_CPU_FEATURE(SME2) +OAKNUT_CPU_FEATURE(SME2p1) +OAKNUT_CPU_FEATURE(SME_I16I32) +OAKNUT_CPU_FEATURE(SME_BI32I32) +OAKNUT_CPU_FEATURE(SME_B16B16) +OAKNUT_CPU_FEATURE(SME_F16F16) +OAKNUT_CPU_FEATURE(MOPS) +OAKNUT_CPU_FEATURE(HBC) diff --git a/include/oaknut/impl/enum.hpp b/include/oaknut/impl/enum.hpp index 89dc9356..68448b47 100644 --- a/include/oaknut/impl/enum.hpp +++ b/include/oaknut/impl/enum.hpp @@ -85,15 +85,67 @@ enum class PstateField { }; enum class SystemReg { + AMCFGR_EL0 = 0b11'011'1101'0010'001, + AMCGCR_EL0 = 0b11'011'1101'0010'010, + AMCNTENCLR0_EL0 = 0b11'011'1101'0010'100, + AMCNTENCLR1_EL0 = 0b11'011'1101'0011'000, + AMCNTENSET0_EL0 = 0b11'011'1101'0010'101, + AMCNTENSET1_EL0 = 0b11'011'1101'0011'001, + AMCR_EL0 = 0b11'011'1101'0010'000, + AMEVCNTR0_n_EL0 = 0b11'011'1101'0100'000, // n = 0-3 + AMEVCNTR1_n_EL0 = 0b11'011'1101'1100'000, // n = 0-15 + AMEVTYPER0_n_EL0 = 0b11'011'1101'0110'000, // n = 0-3 + AMEVTYPER1_n_EL0 = 0b11'011'1101'1110'000, // n = 0-15 + AMUSERENR_EL0 = 0b11'011'1101'0010'011, CNTFRQ_EL0 = 0b11'011'1110'0000'000, + CNTP_CTL_EL0 = 0b11'011'1110'0010'001, + CNTP_CVAL_EL0 = 0b11'011'1110'0010'010, + CNTP_TVAL_EL0 = 0b11'011'1110'0010'000, CNTPCT_EL0 = 0b11'011'1110'0000'001, + CNTV_CTL_EL0 = 0b11'011'1110'0011'001, + CNTV_CVAL_EL0 = 0b11'011'1110'0011'010, + CNTV_TVAL_EL0 = 0b11'011'1110'0011'000, + CNTVCT_EL0 = 0b11'011'1110'0000'010, CTR_EL0 = 0b11'011'0000'0000'001, + CurrentEL = 0b11'000'0100'0010'010, + DAIF = 0b11'011'0100'0010'001, + DBGDTR_EL0 = 0b10'011'0000'0100'000, + DBGDTRRX_EL0 = 0b10'011'0000'0101'000, + DBGDTRTX_EL0 = 0b10'011'0000'0101'000, DCZID_EL0 = 0b11'011'0000'0000'111, + DIT = 0b11'011'0100'0010'101, + DLR_EL0 = 0b11'011'0100'0101'001, + DSPSR_EL0 = 0b11'011'0100'0101'000, FPCR = 0b11'011'0100'0100'000, FPSR = 0b11'011'0100'0100'001, + MDCCSR_EL0 = 0b10'011'0000'0001'000, NZCV = 0b11'011'0100'0010'000, + PAN = 0b11'000'0100'0010'011, + PMCCFILTR_EL0 = 0b11'011'1110'1111'111, + PMCCNTR_EL0 = 0b11'011'1001'1101'000, + PMCEID0_EL0 = 0b11'011'1001'1100'110, + PMCEID1_EL0 = 0b11'011'1001'1100'111, + PMCNTENCLR_EL0 = 0b11'011'1001'1100'010, + PMCNTENSET_EL0 = 0b11'011'1001'1100'001, + PMCR_EL0 = 0b11'011'1001'1100'000, + PMEVCNTR_n_EL0 = 0b11'011'1110'1000'000, // n = 0-30 + PMEVTYPER_n_EL0 = 0b11'011'1110'1100'000, // n = 0-30 + PMOVSCLR_EL0 = 0b11'011'1001'1100'011, + PMOVSSET_EL0 = 0b11'011'1001'1110'011, + PMSELR_EL0 = 0b11'011'1001'1100'101, + PMSWINC_EL0 = 0b11'011'1001'1100'100, + PMUSERENR_EL0 = 0b11'011'1001'1110'000, + PMXEVCNTR_EL0 = 0b11'011'1001'1101'010, + PMXEVTYPER_EL0 = 0b11'011'1001'1101'001, + SP_EL0 = 0b11'000'0100'0001'000, + SPSel = 0b11'000'0100'0010'000, + SPSR_abt = 0b11'100'0100'0011'001, + SPSR_fiq = 0b11'100'0100'0011'011, + SPSR_irq = 0b11'100'0100'0011'000, + SPSR_und = 0b11'100'0100'0011'010, TPIDR_EL0 = 0b11'011'1101'0000'010, TPIDRRO_EL0 = 0b11'011'1101'0000'011, + UAO = 0b11'000'0100'0010'100, }; enum class AtOp { @@ -199,7 +251,7 @@ enum class TlbiOp { VALE1 = 0b000'0111'101, VAALE1 = 0b000'0111'111, IPAS2E1IS = 0b100'0000'001, - RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI + RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI IPAS2LE1IS = 0b100'0000'101, RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI @@ -214,11 +266,11 @@ enum class TlbiOp { ALLE1IS = 0b100'0011'100, VALE2IS = 0b100'0011'101, VMALLS12E1IS = 0b100'0011'110, - IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI + IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI IPAS2E1 = 0b100'0100'001, - RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI - RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI - IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI + RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI + RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI + IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI IPAS2LE1 = 0b100'0100'101, RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI diff --git a/include/oaknut/impl/imm.hpp b/include/oaknut/impl/imm.hpp index cc90832c..7cde26fe 100644 --- a/include/oaknut/impl/imm.hpp +++ b/include/oaknut/impl/imm.hpp @@ -60,9 +60,9 @@ public: constexpr /* implicit */ AddSubImm(std::uint64_t value_) { if ((value_ & 0xFFF) == value_) { - m_encoded = value_; + m_encoded = static_cast(value_); } else if ((value_ & 0xFFF000) == value_) { - m_encoded = (value_ >> 12) | (1 << 12); + m_encoded = static_cast((value_ >> 12) | (1 << 12)); } else { throw OaknutException{ExceptionType::InvalidAddSubImm}; } @@ -126,18 +126,18 @@ constexpr std::optional encode_bit_imm(std::uint64_t value) if (value == 0 || (~value) == 0) return std::nullopt; - const std::size_t rotation = std::countr_zero(value & (value + 1)); + const int rotation = std::countr_zero(value & (value + 1)); const std::uint64_t rot_value = std::rotr(value, rotation); - const std::size_t esize = std::countr_zero(rot_value & (rot_value + 1)); - const std::size_t ones = std::countr_one(rot_value); + const int esize = std::countr_zero(rot_value & (rot_value + 1)); + const int ones = std::countr_one(rot_value); if (std::rotr(value, esize) != value) return std::nullopt; - const std::uint32_t S = ((-esize) << 1) | (ones - 1); - const std::uint32_t R = (esize - rotation) & (esize - 1); - const std::uint32_t N = (~S >> 6) & 1; + const int S = ((-esize) << 1) | (ones - 1); + const int R = (esize - rotation) & (esize - 1); + const int N = (~S >> 6) & 1; return static_cast((S & 0b111111) | (R << 6) | (N << 12)); } diff --git a/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp b/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp index 4f5ca8f0..09e8665f 100644 --- a/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp +++ b/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp @@ -167,13 +167,13 @@ void BFI(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0011001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1); + emit<"0011001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1); } void BFI(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1011001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1); + emit<"1011001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1); } void BFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms) { @@ -1231,13 +1231,13 @@ void SBFIZ(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0001001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1); + emit<"0001001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1); } void SBFIZ(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1001001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1); + emit<"1001001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1); } void SBFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms) { @@ -1627,13 +1627,13 @@ void UBFIZ(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0101001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1); + emit<"0101001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1); } void UBFIZ(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1101001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1); + emit<"1101001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1); } void UBFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms) { diff --git a/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp b/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp index a5bc5b82..0dffd0e3 100644 --- a/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp +++ b/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp @@ -5,13 +5,13 @@ void BFC(WReg wd, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (-lsb.value()) & 31, width.value() - 1); + emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (~lsb.value() + 1) & 31, width.value() - 1); } void BFC(XReg xd, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (-lsb.value()) & 63, width.value() - 1); + emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (~lsb.value() + 1) & 63, width.value() - 1); } void ESB() { diff --git a/include/oaknut/impl/oaknut_exception.inc.hpp b/include/oaknut/impl/oaknut_exception.inc.hpp index 07402362..fc2738f0 100644 --- a/include/oaknut/impl/oaknut_exception.inc.hpp +++ b/include/oaknut/impl/oaknut_exception.inc.hpp @@ -29,6 +29,7 @@ OAKNUT_EXCEPTION(ImmOutOfRange, "outsized Imm value") OAKNUT_EXCEPTION(InvalidAddSubExt, "invalid AddSubExt choice for rm size") OAKNUT_EXCEPTION(InvalidIndexExt, "invalid IndexExt choice for rm size") OAKNUT_EXCEPTION(BitPositionOutOfRange, "bit position exceeds size of rt") +OAKNUT_EXCEPTION(RequiresAbsoluteAddressesContext, "absolute addresses required") // mnemonics_*.inc.hpp OAKNUT_EXCEPTION(InvalidCombination, "InvalidCombination") diff --git a/include/oaknut/impl/offset.hpp b/include/oaknut/impl/offset.hpp index 47859c78..a70941ff 100644 --- a/include/oaknut/impl/offset.hpp +++ b/include/oaknut/impl/offset.hpp @@ -45,7 +45,7 @@ struct AddrOffset { : m_payload(&label) {} - AddrOffset(void* ptr) + AddrOffset(const void* ptr) : m_payload(ptr) {} @@ -63,7 +63,7 @@ struct AddrOffset { private: template friend class BasicCodeGenerator; - std::variant m_payload; + std::variant m_payload; }; template @@ -78,13 +78,19 @@ struct PageOffset { static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target) { - std::uint64_t diff = (static_cast(target) >> shift_amount) - (static_cast(current_addr) >> shift_amount); + std::uint64_t diff = static_cast((static_cast(target) >> shift_amount) - (static_cast(current_addr) >> shift_amount)); if (detail::sign_extend(diff) != diff) throw OaknutException{ExceptionType::OffsetOutOfRange}; diff &= detail::mask_from_size(bitsize); return static_cast(((diff & 3) << (bitsize - 2)) | (diff >> 2)); } + static bool valid(std::uintptr_t current_addr, std::uintptr_t target) + { + std::uint64_t diff = static_cast((static_cast(target) >> shift_amount) - (static_cast(current_addr) >> shift_amount)); + return detail::sign_extend(diff) == diff; + } + private: template friend class BasicCodeGenerator; diff --git a/include/oaknut/impl/overloaded.hpp b/include/oaknut/impl/overloaded.hpp new file mode 100644 index 00000000..b15b8392 --- /dev/null +++ b/include/oaknut/impl/overloaded.hpp @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +namespace oaknut::detail { + +template +struct overloaded : Ts... { + using Ts::operator()...; +}; + +template +overloaded(Ts...) -> overloaded; + +} // namespace oaknut::detail diff --git a/include/oaknut/impl/reg.hpp b/include/oaknut/impl/reg.hpp index eab02d84..649e67b3 100644 --- a/include/oaknut/impl/reg.hpp +++ b/include/oaknut/impl/reg.hpp @@ -52,8 +52,8 @@ struct DElem; struct Reg { constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_) - : m_index(index_) - , m_bitsize(bitsize_) + : m_index(static_cast(index_)) + , m_bitsize(static_cast(bitsize_)) , m_is_vector(is_vector_) { assert(index_ >= -1 && index_ <= 31); @@ -65,8 +65,8 @@ struct Reg { constexpr bool is_vector() const { return m_is_vector; } private: - int m_index : 8; - unsigned m_bitsize : 8; + std::int8_t m_index; + std::uint8_t m_bitsize; bool m_is_vector; }; @@ -190,7 +190,7 @@ struct VReg : public Reg { struct VRegArranged : public Reg { protected: constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_) - : Reg(true, bitsize_, index_), m_esize(esize_) + : Reg(true, bitsize_, index_), m_esize(static_cast(esize_)) { assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two"); assert(esize_ <= bitsize_); @@ -200,7 +200,7 @@ protected: friend class BasicCodeGenerator; private: - int m_esize : 8; + std::uint8_t m_esize; }; struct VReg_2H : public VRegArranged { diff --git a/include/oaknut/impl/string_literal.hpp b/include/oaknut/impl/string_literal.hpp index e09dfa65..412203e9 100644 --- a/include/oaknut/impl/string_literal.hpp +++ b/include/oaknut/impl/string_literal.hpp @@ -21,4 +21,22 @@ struct StringLiteral { char value[N]; }; +namespace detail { + +template haystack, StringLiteral needles> +consteval std::uint32_t find() +{ + std::uint32_t result = 0; + for (std::size_t i = 0; i < 32; i++) { + for (std::size_t a = 0; a < needles.strlen; a++) { + if (haystack.value[i] == needles.value[a]) { + result |= 1 << (31 - i); + } + } + } + return result; +} + +} // namespace detail + } // namespace oaknut diff --git a/include/oaknut/oaknut.hpp b/include/oaknut/oaknut.hpp index b67f7fdd..aa80f81b 100644 --- a/include/oaknut/oaknut.hpp +++ b/include/oaknut/oaknut.hpp @@ -17,84 +17,69 @@ #include "oaknut/impl/list.hpp" #include "oaknut/impl/multi_typed_name.hpp" #include "oaknut/impl/offset.hpp" +#include "oaknut/impl/overloaded.hpp" #include "oaknut/impl/reg.hpp" #include "oaknut/impl/string_literal.hpp" #include "oaknut/oaknut_exception.hpp" namespace oaknut { -namespace detail { - -template -constexpr std::uint32_t get_bits() -{ - std::uint32_t result = 0; - for (std::size_t i = 0; i < 32; i++) { - for (std::size_t a = 0; a < barg.strlen; a++) { - if (bs.value[i] == barg.value[a]) { - result |= 1 << (31 - i); - } - } - } - return result; -} - -template -struct overloaded : Ts... { - using Ts::operator()...; -}; - -template -overloaded(Ts...) -> overloaded; - -} // namespace detail - struct Label { public: Label() = default; + bool is_bound() const + { + return m_offset.has_value(); + } + + std::ptrdiff_t offset() const + { + return m_offset.value(); + } + private: template friend class BasicCodeGenerator; - explicit Label(std::uintptr_t addr) - : m_addr(addr) + explicit Label(std::ptrdiff_t offset) + : m_offset(offset) {} - using EmitFunctionType = std::uint32_t (*)(std::uintptr_t wb_addr, std::uintptr_t resolved_addr); + using EmitFunctionType = std::uint32_t (*)(std::ptrdiff_t wb_offset, std::ptrdiff_t resolved_offset); struct Writeback { - std::uintptr_t m_wb_addr; + std::ptrdiff_t m_wb_offset; std::uint32_t m_mask; EmitFunctionType m_fn; }; - std::optional m_addr; + std::optional m_offset; std::vector m_wbs; }; template class BasicCodeGenerator : public Policy { public: - BasicCodeGenerator(typename Policy::constructor_argument_type arg) - : Policy(arg) + BasicCodeGenerator(typename Policy::constructor_argument_type arg, std::uint32_t* xmem) + : Policy(arg, xmem) {} - Label l() + Label l() const { - return Label{Policy::current_address()}; + return Label{Policy::offset()}; } - void l(Label& label) + void l(Label& label) const { - if (label.m_addr) + if (label.is_bound()) throw OaknutException{ExceptionType::LabelRedefinition}; - const auto target_addr = Policy::current_address(); - label.m_addr = target_addr; + const auto target_offset = Policy::offset(); + label.m_offset = target_offset; for (auto& wb : label.m_wbs) { - const std::uint32_t value = wb.m_fn(wb.m_wb_addr, target_addr); - Policy::set_at_address(wb.m_wb_addr, value, wb.m_mask); + const std::uint32_t value = wb.m_fn(wb.m_wb_offset, target_offset); + Policy::set_at_offset(wb.m_wb_offset, value, wb.m_mask); } label.m_wbs.clear(); } @@ -123,8 +108,8 @@ public: return; if (MovImm16::is_valid(imm)) return MOVZ(wd, imm); - if (MovImm16::is_valid(~imm)) - return MOVN(wd, ~imm); + if (MovImm16::is_valid(static_cast(~imm))) + return MOVN(wd, static_cast(~imm)); if (detail::encode_bit_imm(imm)) return ORR(wd, WzrReg{}, imm); @@ -173,10 +158,10 @@ public: // Convenience function for moving pointers to registers void MOVP2R(XReg xd, const void* addr) { - int64_t diff = reinterpret_cast(addr) - Policy::current_address(); + const int64_t diff = reinterpret_cast(addr) - Policy::template xptr(); if (diff >= -0xF'FFFF && diff <= 0xF'FFFF) { ADR(xd, addr); - } else if (diff >= -int64_t{0xFFFF'FFFF} && diff <= int64_t{0xFFFF'FFFF}) { + } else if (PageOffset<21, 12>::valid(Policy::template xptr(), reinterpret_cast(addr))) { ADRL(xd, addr); } else { MOV(xd, reinterpret_cast(addr)); @@ -188,7 +173,7 @@ public: if (alignment < 4 || (alignment & (alignment - 1)) != 0) throw OaknutException{ExceptionType::InvalidAlignment}; - while (Policy::template ptr() & (alignment - 1)) { + while (Policy::offset() & (alignment - 1)) { NOP(); } } @@ -210,85 +195,55 @@ private: template void emit(Ts... args) { - std::uint32_t encoding = detail::get_bits(); - encoding |= (0 | ... | encode()>(std::forward(args))); + constexpr std::uint32_t base = detail::find(); + std::uint32_t encoding = (base | ... | encode()>(std::forward(args))); Policy::append(encoding); } - - template - std::uint32_t encode(AddrOffset v) - { - static_assert(std::popcount(splat) == size - align); - - const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) { - const std::ptrdiff_t diff = target - current_addr; - return pdep(AddrOffset::encode(diff)); - }; - - return std::visit(detail::overloaded{ - [&](std::uint32_t encoding) { - return pdep(encoding); - }, - [&](Label* label) { - if (label->m_addr) { - return encode_fn(Policy::current_address(), *label->m_addr); - } - - label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast(encode_fn)}); - return 0u; - }, - [&](void* p) { - return encode_fn(Policy::current_address(), reinterpret_cast(p)); - }, - }, - v.m_payload); - } - - template - std::uint32_t encode(PageOffset v) - { - static_assert(std::popcount(splat) == size); - - const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) { - return pdep(PageOffset::encode(current_addr, target)); - }; - - return std::visit(detail::overloaded{ - [&](Label* label) { - if (label->m_addr) { - return encode_fn(Policy::current_address(), *label->m_addr); - } - - label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast(encode_fn)}); - return 0u; - }, - [&](const void* p) { - return encode_fn(Policy::current_address(), reinterpret_cast(p)); - }, - }, - v.m_payload); - } }; struct PointerCodeGeneratorPolicy { public: + std::ptrdiff_t offset() const + { + return (m_ptr - m_wmem) * sizeof(std::uint32_t); + } + + void set_offset(std::ptrdiff_t offset) + { + if ((offset % sizeof(std::uint32_t)) != 0) + throw OaknutException{ExceptionType::InvalidAlignment}; + m_ptr = m_wmem + offset / sizeof(std::uint32_t); + } + template - T ptr() + T wptr() const { static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); return reinterpret_cast(m_ptr); } - void set_ptr(std::uint32_t* ptr_) + template + T xptr() const { - m_ptr = ptr_; + static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); + return reinterpret_cast(m_xmem + (m_ptr - m_wmem)); + } + + void set_wptr(std::uint32_t* p) + { + m_ptr = p; + } + + void set_xptr(std::uint32_t* p) + { + m_ptr = m_wmem + (p - m_xmem); } protected: using constructor_argument_type = std::uint32_t*; - PointerCodeGeneratorPolicy(std::uint32_t* ptr_) - : m_ptr(ptr_) + PointerCodeGeneratorPolicy(std::uint32_t* wmem, std::uint32_t* xmem) + : m_ptr(wmem), m_wmem(wmem), m_xmem(xmem) {} void append(std::uint32_t instruction) @@ -296,22 +251,57 @@ protected: *m_ptr++ = instruction; } - std::uintptr_t current_address() + void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const { - return reinterpret_cast(m_ptr); - } - - void set_at_address(std::uintptr_t addr, std::uint32_t value, std::uint32_t mask) - { - std::uint32_t* p = reinterpret_cast(addr); + std::uint32_t* p = m_wmem + offset / sizeof(std::uint32_t); *p = (*p & mask) | value; } private: std::uint32_t* m_ptr; + std::uint32_t* const m_wmem; + std::uint32_t* const m_xmem; +}; + +struct VectorCodeGeneratorPolicy { +public: + std::ptrdiff_t offset() const + { + return m_vec.size() * sizeof(std::uint32_t); + } + + template + T xptr() const + { + static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); + return reinterpret_cast(m_xmem + m_vec.size()); + } + +protected: + using constructor_argument_type = std::vector&; + + VectorCodeGeneratorPolicy(std::vector& vec, std::uint32_t* xmem) + : m_vec(vec), m_xmem(xmem) + {} + + void append(std::uint32_t instruction) + { + m_vec.push_back(instruction); + } + + void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const + { + std::uint32_t& p = m_vec[offset / sizeof(std::uint32_t)]; + p = (p & mask) | value; + } + +private: + std::vector& m_vec; + std::uint32_t* const m_xmem; }; using CodeGenerator = BasicCodeGenerator; +using VectorCodeGenerator = BasicCodeGenerator; namespace util { diff --git a/oaknutConfig.cmake.in b/oaknutConfig.cmake.in new file mode 100644 index 00000000..8c9ad12a --- /dev/null +++ b/oaknutConfig.cmake.in @@ -0,0 +1,5 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") + +check_required_components(@PROJECT_NAME@) diff --git a/tests/_feature_detect.cpp b/tests/_feature_detect.cpp new file mode 100644 index 00000000..875abac4 --- /dev/null +++ b/tests/_feature_detect.cpp @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime +// SPDX-License-Identifier: MIT + +#include + +#include + +#include "oaknut/feature_detection/feature_detection.hpp" +#include "oaknut/feature_detection/feature_detection_idregs.hpp" + +using namespace oaknut; + +TEST_CASE("Print CPU features (Default)") +{ + CpuFeatures features = detect_features(); + + std::fputs("CPU Features: ", stdout); + +#define OAKNUT_CPU_FEATURE(name) \ + if (features.has(CpuFeature::name)) \ + std::fputs(#name " ", stdout); +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE + + std::fputs("\n", stdout); +} + +#if OAKNUT_SUPPORTS_READING_ID_REGISTERS == 1 + +TEST_CASE("Print CPU features (Using CPUID)") +{ + std::optional id_regs = read_id_registers(); + REQUIRE(!!id_regs); + + CpuFeatures features = detect_features_via_id_registers(*id_regs); + + std::fputs("CPU Features (CPUID method): ", stdout); + +# define OAKNUT_CPU_FEATURE(name) \ + if (features.has(CpuFeature::name)) \ + std::fputs(#name " ", stdout); +# include "oaknut/impl/cpu_feature.inc.hpp" +# undef OAKNUT_CPU_FEATURE + + std::fputs("\n", stdout); +} + +#elif OAKNUT_SUPPORTS_READING_ID_REGISTERS == 2 + +TEST_CASE("Print CPU features (Using CPUID)") +{ + const std::size_t core_count = get_core_count(); + for (std::size_t core_index = 0; core_index < core_count; core_index++) { + std::optional id_regs = read_id_registers(core_index); + REQUIRE(!!id_regs); + + CpuFeatures features = detect_features_via_id_registers(*id_regs); + + std::printf("CPU Features (CPUID method - Core %zu): ", core_index); + +# define OAKNUT_CPU_FEATURE(name) \ + if (features.has(CpuFeature::name)) \ + std::fputs(#name " ", stdout); +# include "oaknut/impl/cpu_feature.inc.hpp" +# undef OAKNUT_CPU_FEATURE + + std::fputs("\n", stdout); + } +} + +#endif diff --git a/tests/basic.cpp b/tests/basic.cpp index 7b401f60..38342ca4 100644 --- a/tests/basic.cpp +++ b/tests/basic.cpp @@ -8,6 +8,7 @@ #include #include "oaknut/code_block.hpp" +#include "oaknut/dual_code_block.hpp" #include "oaknut/oaknut.hpp" #include "rand_int.hpp" @@ -17,7 +18,7 @@ using namespace oaknut::util; TEST_CASE("Basic Test") { CodeBlock mem{4096}; - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem.ptr(), mem.ptr()}; mem.unprotect(); @@ -31,14 +32,28 @@ TEST_CASE("Basic Test") REQUIRE(result == 42); } +TEST_CASE("Basic Test (Dual)") +{ + DualCodeBlock mem{4096}; + CodeGenerator code{mem.wptr(), mem.xptr()}; + + code.MOV(W0, 42); + code.RET(); + + mem.invalidate_all(); + + int result = ((int (*)())mem.xptr())(); + REQUIRE(result == 42); +} + TEST_CASE("Fibonacci") { CodeBlock mem{4096}; - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem.ptr(), mem.ptr()}; mem.unprotect(); - auto fib = code.ptr(); + auto fib = code.xptr(); Label start, end, zero, recurse; code.l(start); @@ -77,16 +92,59 @@ TEST_CASE("Fibonacci") REQUIRE(fib(9) == 34); } -TEST_CASE("Immediate generation (32-bit)") +TEST_CASE("Fibonacci (Dual)") +{ + DualCodeBlock mem{4096}; + CodeGenerator code{mem.wptr(), mem.xptr()}; + + auto fib = code.xptr(); + Label start, end, zero, recurse; + + code.l(start); + code.STP(X29, X30, SP, PRE_INDEXED, -32); + code.STP(X20, X19, SP, 16); + code.MOV(X29, SP); + code.MOV(W19, W0); + code.SUBS(W0, W0, 1); + code.B(LT, zero); + code.B(NE, recurse); + code.MOV(W0, 1); + code.B(end); + + code.l(zero); + code.MOV(W0, WZR); + code.B(end); + + code.l(recurse); + code.BL(start); + code.MOV(W20, W0); + code.SUB(W0, W19, 2); + code.BL(start); + code.ADD(W0, W0, W20); + + code.l(end); + code.LDP(X20, X19, SP, 16); + code.LDP(X29, X30, SP, POST_INDEXED, 32); + code.RET(); + + mem.invalidate_all(); + + REQUIRE(fib(0) == 0); + REQUIRE(fib(1) == 1); + REQUIRE(fib(5) == 5); + REQUIRE(fib(9) == 34); +} + +TEST_CASE("Immediate generation (32-bit)", "[slow]") { CodeBlock mem{4096}; for (int i = 0; i < 0x100000; i++) { const std::uint32_t value = RandInt(0, 0xffffffff); - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem.ptr(), mem.ptr()}; - auto f = code.ptr(); + auto f = code.xptr(); mem.unprotect(); code.MOV(W0, value); code.RET(); @@ -97,16 +155,16 @@ TEST_CASE("Immediate generation (32-bit)") } } -TEST_CASE("Immediate generation (64-bit)") +TEST_CASE("Immediate generation (64-bit)", "[slow]") { CodeBlock mem{4096}; for (int i = 0; i < 0x100000; i++) { const std::uint64_t value = RandInt(0, 0xffffffff'ffffffff); - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem.ptr(), mem.ptr()}; - auto f = code.ptr(); + auto f = code.xptr(); mem.unprotect(); code.MOV(X0, value); code.RET(); @@ -117,16 +175,16 @@ TEST_CASE("Immediate generation (64-bit)") } } -TEST_CASE("ADR") +TEST_CASE("ADR", "[slow]") { CodeBlock mem{4096}; for (std::int64_t i = -1048576; i < 1048576; i++) { const std::intptr_t value = reinterpret_cast(mem.ptr()) + i; - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem.ptr(), mem.ptr()}; - auto f = code.ptr(); + auto f = code.xptr(); mem.unprotect(); code.ADR(X0, reinterpret_cast(value)); code.RET(); @@ -138,7 +196,20 @@ TEST_CASE("ADR") } } -TEST_CASE("ADRP") +TEST_CASE("PageOffset (rollover)") +{ + REQUIRE(PageOffset<21, 12>::encode(0x0000000088e74000, 0xffffffffd167dece) == 0xd2202); +} + +TEST_CASE("PageOffset (page boundary)") +{ + REQUIRE(PageOffset<21, 12>::encode(0x0001000000000002, 0x0001000000000001) == 0); + REQUIRE(PageOffset<21, 12>::encode(0x0001000000000001, 0x0001000000000002) == 0); + REQUIRE(PageOffset<21, 12>::encode(0x0001000000001000, 0x0001000000000fff) == 0x1fffff); + REQUIRE(PageOffset<21, 12>::encode(0x0001000000000fff, 0x0001000000001000) == 0x080000); +} + +TEST_CASE("ADRP", "[slow]") { CodeBlock mem{4096}; @@ -147,9 +218,9 @@ TEST_CASE("ADRP") const std::intptr_t value = reinterpret_cast(mem.ptr()) + diff; const std::uint64_t expect = static_cast(value) & ~static_cast(0xfff); - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem.ptr(), mem.ptr()}; - auto f = code.ptr(); + auto f = code.xptr(); mem.unprotect(); code.ADRP(X0, reinterpret_cast(value)); code.RET(); @@ -161,17 +232,18 @@ TEST_CASE("ADRP") } } -TEST_CASE("ADRL") +TEST_CASE("ADRL (near)") { CodeBlock mem{4096}; + std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing - for (int i = 0; i < 0x200000; i++) { - const std::int64_t diff = RandInt(-4294967296, 4294967295); - const std::intptr_t value = reinterpret_cast(mem.ptr()) + diff; + for (int i = -0x4000; i < 0x4000; i++) { + const std::int64_t diff = i; + const std::intptr_t value = reinterpret_cast(mem_ptr) + diff; - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem_ptr, mem_ptr}; - auto f = code.ptr(); + auto f = code.xptr(); mem.unprotect(); code.ADRL(X0, reinterpret_cast(value)); code.RET(); @@ -183,18 +255,42 @@ TEST_CASE("ADRL") } } -TEST_CASE("MOVP2R") +TEST_CASE("ADRL (far)", "[slow]") { CodeBlock mem{4096}; + std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing - for (int i = 0; i < 0x200'0000; i++) { + for (int i = 0; i < 0x200000; i++) { + const std::int64_t diff = RandInt(-4294967296 + 100, 4294967295 - 100); + const std::intptr_t value = reinterpret_cast(mem_ptr) + diff; + + CodeGenerator code{mem_ptr, mem_ptr}; + + auto f = code.xptr(); + mem.unprotect(); + code.ADRL(X0, reinterpret_cast(value)); + code.RET(); + mem.protect(); + mem.invalidate_all(); + + INFO(i); + REQUIRE(f() == static_cast(value)); + } +} + +TEST_CASE("MOVP2R (far)", "[slow]") +{ + CodeBlock mem{4096}; + std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing + + for (int i = 0; i < 0x200000; i++) { const std::int64_t diff = RandInt(std::numeric_limits::min(), std::numeric_limits::max()); - const std::intptr_t value = reinterpret_cast(mem.ptr()) + diff; + const std::intptr_t value = reinterpret_cast(mem_ptr) + diff; - CodeGenerator code{mem.ptr()}; + CodeGenerator code{mem_ptr, mem_ptr}; - auto f = code.ptr(); + auto f = code.xptr(); mem.unprotect(); code.MOVP2R(X0, reinterpret_cast(value)); code.RET(); @@ -204,3 +300,28 @@ TEST_CASE("MOVP2R") REQUIRE(f() == static_cast(value)); } } + +TEST_CASE("MOVP2R (4GiB boundary)") +{ + CodeBlock mem{4096}; + std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing + + for (std::int64_t i = 0xFFFF'F000; i < 0x1'0000'1000; i++) { + const auto test = [&](std::int64_t diff) { + const std::intptr_t value = reinterpret_cast(mem_ptr) + diff; + + CodeGenerator code{mem_ptr, mem_ptr}; + + auto f = code.xptr(); + mem.unprotect(); + code.MOVP2R(X0, reinterpret_cast(value)); + code.RET(); + mem.protect(); + mem.invalidate_all(); + + REQUIRE(f() == static_cast(value)); + }; + test(i); + test(-i); + } +} diff --git a/tests/fpsimd.cpp b/tests/fpsimd.cpp index e0cb0e26..d164f8e6 100644 --- a/tests/fpsimd.cpp +++ b/tests/fpsimd.cpp @@ -8,18 +8,18 @@ #include "oaknut/oaknut.hpp" -#define T(HEX, CMD) \ - TEST_CASE(#CMD) \ - { \ - using namespace oaknut; \ - using namespace oaknut::util; \ - \ - std::uint32_t result; \ - CodeGenerator code{&result}; \ - \ - code.CMD; \ - \ - REQUIRE(result == HEX); \ +#define T(HEX, CMD) \ + TEST_CASE(#CMD) \ + { \ + using namespace oaknut; \ + using namespace oaknut::util; \ + \ + std::uint32_t result; \ + CodeGenerator code{&result, &result}; \ + \ + code.CMD; \ + \ + REQUIRE(result == HEX); \ } T(0x5ee0bb61, ABS(D1, D27)) diff --git a/tests/general.cpp b/tests/general.cpp index 2caf4652..0acb35f3 100644 --- a/tests/general.cpp +++ b/tests/general.cpp @@ -8,18 +8,18 @@ #include "oaknut/oaknut.hpp" -#define T(HEX, CMD) \ - TEST_CASE(#CMD) \ - { \ - using namespace oaknut; \ - using namespace oaknut::util; \ - \ - std::uint32_t result; \ - CodeGenerator code{&result}; \ - \ - code.CMD; \ - \ - REQUIRE(result == HEX); \ +#define T(HEX, CMD) \ + TEST_CASE(#CMD) \ + { \ + using namespace oaknut; \ + using namespace oaknut::util; \ + \ + std::uint32_t result; \ + CodeGenerator code{&result, &result}; \ + \ + code.CMD; \ + \ + REQUIRE(result == HEX); \ } T(0x1a0f01c3, ADC(W3, W14, W15)) diff --git a/tests/vector_code_gen.cpp b/tests/vector_code_gen.cpp new file mode 100644 index 00000000..e06b135c --- /dev/null +++ b/tests/vector_code_gen.cpp @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include + +#include + +#include "oaknut/code_block.hpp" +#include "oaknut/oaknut.hpp" +#include "rand_int.hpp" + +using namespace oaknut; +using namespace oaknut::util; + +TEST_CASE("Basic Test (VectorCodeGenerator)") +{ + CodeBlock mem{4096}; + std::vector vec; + VectorCodeGenerator code{vec, mem.ptr()}; + + code.MOV(W0, 42); + code.RET(); + + mem.unprotect(); + std::memcpy(mem.ptr(), vec.data(), vec.size() * sizeof(std::uint32_t)); + mem.protect(); + mem.invalidate_all(); + + int result = ((int (*)())mem.ptr())(); + REQUIRE(result == 42); +} + +TEST_CASE("Fibonacci (VectorCodeGenerator)") +{ + CodeBlock mem{4096}; + std::vector vec; + VectorCodeGenerator code{vec, mem.ptr()}; + + Label start, end, zero, recurse; + + code.l(start); + code.STP(X29, X30, SP, PRE_INDEXED, -32); + code.STP(X20, X19, SP, 16); + code.MOV(X29, SP); + code.MOV(W19, W0); + code.SUBS(W0, W0, 1); + code.B(LT, zero); + code.B(NE, recurse); + code.MOV(W0, 1); + code.B(end); + + code.l(zero); + code.MOV(W0, WZR); + code.B(end); + + code.l(recurse); + code.BL(start); + code.MOV(W20, W0); + code.SUB(W0, W19, 2); + code.BL(start); + code.ADD(W0, W0, W20); + + code.l(end); + code.LDP(X20, X19, SP, 16); + code.LDP(X29, X30, SP, POST_INDEXED, 32); + code.RET(); + + mem.unprotect(); + std::memcpy(mem.ptr(), vec.data(), vec.size() * sizeof(std::uint32_t)); + mem.protect(); + mem.invalidate_all(); + + auto fib = (int (*)(int))mem.ptr(); + + REQUIRE(fib(0) == 0); + REQUIRE(fib(1) == 1); + REQUIRE(fib(5) == 5); + REQUIRE(fib(9) == 34); +}