From e4a733d5b2e02e7095847892f29ed8288d55d91e Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 2 Mar 2024 19:36:24 +0000 Subject: [PATCH] Squashed 'externals/biscuit/' content from commit 1d53c9e6 git-subtree-dir: externals/biscuit git-subtree-split: 1d53c9e6cfe56544982200a5bac15a5de064936b --- .github/workflows/build-and-test.yml | 45 + .gitignore | 3 + CMakeLists.txt | 17 + LICENSE.md | 12 + README.md | 157 + clang-format | 88 + cmake/biscuit-config.cmake.in | 5 + examples/CMakeLists.txt | 1 + examples/cpuinfo/CMakeLists.txt | 3 + examples/cpuinfo/cpuinfo.cpp | 31 + include/biscuit/assembler.hpp | 1475 ++ include/biscuit/assert.hpp | 14 + include/biscuit/code_buffer.hpp | 211 + include/biscuit/cpuinfo.hpp | 101 + include/biscuit/csr.hpp | 443 + include/biscuit/isa.hpp | 49 + include/biscuit/label.hpp | 173 + include/biscuit/registers.hpp | 315 + include/biscuit/vector.hpp | 88 + src/CMakeLists.txt | 156 + src/assembler.cpp | 1401 ++ src/assembler_compressed.cpp | 696 + src/assembler_crypto.cpp | 172 + src/assembler_floating_point.cpp | 648 + src/assembler_util.hpp | 224 + src/assembler_vector.cpp | 2146 +++ src/code_buffer.cpp | 111 + src/cpuinfo.cpp | 39 + tests/CMakeLists.txt | 76 + tests/externals/catch/catch.hpp | 17976 ++++++++++++++++++ tests/src/assembler_bfloat_tests.cpp | 95 + tests/src/assembler_branch_tests.cpp | 105 + tests/src/assembler_cmo_tests.cpp | 113 + tests/src/assembler_privileged_tests.cpp | 302 + tests/src/assembler_rv32i_tests.cpp | 769 + tests/src/assembler_rv64i_tests.cpp | 436 + tests/src/assembler_rva_tests.cpp | 513 + tests/src/assembler_rvb_tests.cpp | 610 + tests/src/assembler_rvc_tests.cpp | 595 + tests/src/assembler_rvd_tests.cpp | 528 + tests/src/assembler_rvf_tests.cpp | 1085 ++ tests/src/assembler_rvk_tests.cpp | 384 + tests/src/assembler_rvm_tests.cpp | 241 + tests/src/assembler_rvq_tests.cpp | 538 + tests/src/assembler_rvv_tests.cpp | 5334 ++++++ tests/src/assembler_test_utils.hpp | 23 + tests/src/assembler_vector_crypto_tests.cpp | 495 + tests/src/assembler_zacas_tests.cpp | 76 + tests/src/assembler_zawrs_tests.cpp | 23 + tests/src/assembler_zc_tests.cpp | 457 + tests/src/assembler_zfa_tests.cpp | 414 + tests/src/assembler_zicond_tests.cpp | 33 + tests/src/assembler_zicsr_tests.cpp | 130 + tests/src/assembler_zihintntl_tests.cpp | 71 + tests/src/main.cpp | 2 + 55 files changed, 40248 insertions(+) create mode 100644 .github/workflows/build-and-test.yml create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 clang-format create mode 100644 cmake/biscuit-config.cmake.in create mode 100644 examples/CMakeLists.txt create mode 100644 examples/cpuinfo/CMakeLists.txt create mode 100644 examples/cpuinfo/cpuinfo.cpp create mode 100644 include/biscuit/assembler.hpp create mode 100644 include/biscuit/assert.hpp create mode 100644 include/biscuit/code_buffer.hpp create mode 100644 include/biscuit/cpuinfo.hpp create mode 100644 include/biscuit/csr.hpp create mode 100644 include/biscuit/isa.hpp create mode 100644 include/biscuit/label.hpp create mode 100644 include/biscuit/registers.hpp create mode 100644 include/biscuit/vector.hpp create mode 100644 src/CMakeLists.txt create mode 100644 src/assembler.cpp create mode 100644 src/assembler_compressed.cpp create mode 100644 src/assembler_crypto.cpp create mode 100644 src/assembler_floating_point.cpp create mode 100644 src/assembler_util.hpp create mode 100644 src/assembler_vector.cpp create mode 100644 src/code_buffer.cpp create mode 100644 src/cpuinfo.cpp create mode 100644 tests/CMakeLists.txt create mode 100644 tests/externals/catch/catch.hpp create mode 100644 tests/src/assembler_bfloat_tests.cpp create mode 100644 tests/src/assembler_branch_tests.cpp create mode 100644 tests/src/assembler_cmo_tests.cpp create mode 100644 tests/src/assembler_privileged_tests.cpp create mode 100644 tests/src/assembler_rv32i_tests.cpp create mode 100644 tests/src/assembler_rv64i_tests.cpp create mode 100644 tests/src/assembler_rva_tests.cpp create mode 100644 tests/src/assembler_rvb_tests.cpp create mode 100644 tests/src/assembler_rvc_tests.cpp create mode 100644 tests/src/assembler_rvd_tests.cpp create mode 100644 tests/src/assembler_rvf_tests.cpp create mode 100644 tests/src/assembler_rvk_tests.cpp create mode 100644 tests/src/assembler_rvm_tests.cpp create mode 100644 tests/src/assembler_rvq_tests.cpp create mode 100644 tests/src/assembler_rvv_tests.cpp create mode 100644 tests/src/assembler_test_utils.hpp create mode 100644 tests/src/assembler_vector_crypto_tests.cpp create mode 100644 tests/src/assembler_zacas_tests.cpp create mode 100644 tests/src/assembler_zawrs_tests.cpp create mode 100644 tests/src/assembler_zc_tests.cpp create mode 100644 tests/src/assembler_zfa_tests.cpp create mode 100644 tests/src/assembler_zicond_tests.cpp create mode 100644 tests/src/assembler_zicsr_tests.cpp create mode 100644 tests/src/assembler_zihintntl_tests.cpp create mode 100644 tests/src/main.cpp diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml new file mode 100644 index 00000000..3fa8c40f --- /dev/null +++ b/.github/workflows/build-and-test.yml @@ -0,0 +1,45 @@ +name: Build and Test + +on: [push, pull_request] + +env: + BUILD_TYPE: Release + +jobs: + build: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + cpu_detection: [0, 1] + fail-fast: false + + runs-on: ${{matrix.os}} + + steps: + + - name: Install build dependencies + if: ${{matrix.os == 'ubuntu-latest'}} + run: sudo apt-get install llvm ninja-build + + - name: Install build dependencies + if: ${{matrix.os == 'macos-latest'}} + run: | + brew install llvm ninja + echo "/usr/local/opt/llvm/bin" >> $GITHUB_PATH + + - name: Checkout biscuit repo + uses: actions/checkout@v2 + + - name: Configure CMake + run: > + cmake + -B ${{github.workspace}}/build + -G Ninja + + - name: Build + working-directory: ${{github.workspace}}/build + run: ninja + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest --extra-verbose -C ${{env.BUILD_TYPE}} diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0e34a625 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Built files +build/ +build-*/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..503c8c48 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.15) +project(biscuit VERSION 0.14.0) + +include(CTest) + +option(BISCUIT_CODE_BUFFER_MMAP "Use mmap for handling code buffers instead of new" OFF) + +# Source directories +add_subdirectory(src) + +if (BUILD_TESTING) + add_subdirectory(tests) +endif() + +if (BUILD_EXAMPLES) + add_subdirectory(examples) +endif() diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..53cde664 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,12 @@ +Copyright 2021 Lioncash/Lioncache + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..db4379a4 --- /dev/null +++ b/README.md @@ -0,0 +1,157 @@ +# Biscuit: RISC-V Runtime Code Generation Library + +*RISC it for the biscuit* + +## About + +An experimental runtime code generator for RISC-V. + +This allows for runtime code generation of RISC-V instructions. Similar +to how [Xbyak](https://github.com/herumi/xbyak) allows for runtime code generation of x86 instructions. + + +## Implemented ISA Features + +Includes both 32-bit and 64-bit instructions in the following: + +| Feature | Version | +|:----------|:-------:| +| A | 2.1 | +| B | 1.0 | +| C | 2.0 | +| D | 2.2 | +| F | 2.2 | +| H | 1.0 | +| K | 1.0.1 | +| M | 2.0 | +| N | 1.1 | +| Q | 2.2 | +| RV32I | 2.1 | +| RV64I | 2.1 | +| S | 1.12 | +| V | 1.0 | +| Sstc | 0.5.4 | +| Zacas | 1.0 | +| Zawrs | 1.01 | +| Zcb | 1.0.4 | +| Zcmp | 1.0.4 | +| Zcmt | 1.0.4 | +| Zfa | 1.0 | +| Zfbfmin | 1.0 rc2 | +| Zfh | 1.0 | +| Zfhmin | 1.0 | +| Zicbom | 1.0 | +| Zicbop | 1.0 | +| Zicboz | 1.0 | +| Zicond | 1.0.1 | +| Zicsr | 2.0 | +| Zifencei | 2.0 | +| Zihintntl | 1.0 | +| Zvbb | 1.0 | +| Zvbc | 1.0 | +| Zvfbfmin | 1.0 rc2 | +| Zvfbfwma | 1.0 rc2 | +| Zvkn | 1.0 | + +Note that usually only extensions considered ratified will be implemented +as non-ratified documents are considerably more likely to have +large changes made to them, which makes maintaining instruction +APIs a little annoying. + + +## Dependencies + +Biscuit requires no external dependencies for its library other than the C++ standard library. +The tests, however, use the Catch2 testing library. This is included in tree so there's no need +to worry about installing it yourself if you wish to run said tests. + + +## Building Biscuit + +1. Generate the build files for the project with CMake +2. Hit the build button in your IDE of choice, or run the relevant console command to build for the CMake generator you've chosen. +3. Done. + + +## Running Tests + +1. Generate the build files for the project with CMake +2. Build the tests +3. Run the test executable directly, or enter `ctest` into your terminal. + + +## License + +The library is licensed under the MIT license. + +While it's not a requirement whatsoever, it'd be pretty neat if you told me that you found the library useful :-) + + +## Example + +The following is an adapted equivalent of the `strlen` implementation within the RISC-V bit manipulation extension specification. +For brevity, it has been condensed to only handle little-endian platforms. + +```cpp +// We prepare some contiguous buffer and give the pointer to the beginning +// of the data and the total size of the buffer in bytes to the assembler. + +void strlen_example(uint8_t* buffer, size_t buffer_size) { + using namespace biscuit; + + constexpr int ptrlog = 3; + constexpr int szreg = 8; + + Assembler as(buffer, buffer_size); + Label done; + Label loop; + + as.ANDI(a3, a0, szreg - 1); // Offset + as.ANDI(a1, a0, 0xFF8); // Align pointer + + as.LI(a4, szreg); + as.SUB(a4, a4, a3); // XLEN - offset + as.SLLI(a3, a3, ptrlog); // offset * 8 + as.LD(a2, 0, a1); // Chunk + + // + // Shift the partial/unaligned chunk we loaded to remove the bytes + // from before the start of the string, adding NUL bytes at the end. + // + as.SRL(a2, a2, a3); // chunk >> (offset * 8) + as.ORCB(a2, a2); + as.NOT(a2, a2); + + // Non-NUL bytes in the string have been expanded to 0x00, while + // NUL bytes have become 0xff. Search for the first set bit + // (corresponding to a NUL byte in the original chunk). + as.CTZ(a2, a2); + + // The first chunk is special: compare against the number of valid + // bytes in this chunk. + as.SRLI(a0, a2, 3); + as.BGTU(a4, a0, &done); + as.ADDI(a3, a1, szreg); + as.LI(a4, -1); + + // Our critical loop is 4 instructions and processes data in 4 byte + // or 8 byte chunks. + as.Bind(&loop); + + as.LD(a2, szreg, a1); + as.ADDI(a1, a1, szreg); + as.ORCB(a2, a2); + as.BEQ(a2, a4, &loop); + + as.NOT(a2, a2); + as.CTZ(a2, a2); + as.SUB(a1, a1, a3); + as.ADD(a0, a0, a1); + as.SRLI(a2, a2, 3); + as.ADD(a0, a0, a2); + + as.Bind(&done); + + as.RET(); +} +``` diff --git a/clang-format b/clang-format new file mode 100644 index 00000000..1c6b71b2 --- /dev/null +++ b/clang-format @@ -0,0 +1,88 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^\<[^Q][^/.>]*\>' + Priority: -2 + - Regex: '^\<' + Priority: -1 + - Regex: '^\"' + Priority: 0 +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 150 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never +... diff --git a/cmake/biscuit-config.cmake.in b/cmake/biscuit-config.cmake.in new file mode 100644 index 00000000..46b180ab --- /dev/null +++ b/cmake/biscuit-config.cmake.in @@ -0,0 +1,5 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake") + +check_required_components(@PROJECT_NAME@) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 00000000..8b3246cf --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(cpuinfo) diff --git a/examples/cpuinfo/CMakeLists.txt b/examples/cpuinfo/CMakeLists.txt new file mode 100644 index 00000000..16e6e4d0 --- /dev/null +++ b/examples/cpuinfo/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable(cpuinfo cpuinfo.cpp) +target_link_libraries(cpuinfo biscuit) +set_property(TARGET cpuinfo PROPERTY CXX_STANDARD 20) diff --git a/examples/cpuinfo/cpuinfo.cpp b/examples/cpuinfo/cpuinfo.cpp new file mode 100644 index 00000000..450dfbdb --- /dev/null +++ b/examples/cpuinfo/cpuinfo.cpp @@ -0,0 +1,31 @@ +// Copyright (c), 2022, KNS Group LLC (YADRO) +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +#include +#include + +#include + +using namespace biscuit; + +int main() +{ + CPUInfo cpu; + + std::cout << "Has I:" << cpu.Has(RISCVExtension::I) << std::endl; + std::cout << "Has M:" << cpu.Has(RISCVExtension::M) << std::endl; + std::cout << "Has A:" << cpu.Has(RISCVExtension::A) << std::endl; + std::cout << "Has F:" << cpu.Has(RISCVExtension::F) << std::endl; + std::cout << "Has D:" << cpu.Has(RISCVExtension::D) << std::endl; + std::cout << "Has C:" << cpu.Has(RISCVExtension::C) << std::endl; + std::cout << "Has V:" << cpu.Has(RISCVExtension::V) << std::endl; + + if (cpu.Has(RISCVExtension::V)) { + std::cout << "VLENB:" << cpu.GetVlenb() << std::endl; + } + + return 0; +} diff --git a/include/biscuit/assembler.hpp b/include/biscuit/assembler.hpp new file mode 100644 index 00000000..78673d3b --- /dev/null +++ b/include/biscuit/assembler.hpp @@ -0,0 +1,1475 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace biscuit { + +/** + * Defines the set of features that a particular assembler instance + * would like to assemble for. + * + * This allows for assertions and extra logic checking to be done. + * + * It can also affect various behaviors as well. e.g. LI, shifts, etc + * will take these into account to adjust for emission on different + * environments transparently. + */ +enum class ArchFeature : uint32_t { + RV32, //< 32-bit RISC-V + RV64, //< 64-bit RISC-V + RV128, //< 128-bit RISC-V +}; + +/** + * Code generator for RISC-V code. + * + * User code may inherit from this in order to make use of + * the API more convenient, or use it separately if desired. + */ +class Assembler { +public: + /** + * Constructor + * + * Initializes the underlying code buffer to be able to hold `capacity` bytes. + * + * @param capacity The capacity for the underlying code buffer in bytes. + * If no capacity is specified, then the underlying buffer + * will be 4KB in size. + * + * @note Will assume to be assembling for RV64 unless changed. + */ + [[nodiscard]] explicit Assembler(size_t capacity = CodeBuffer::default_capacity); + + /** + * Constructor + * + * @param buffer A non-null pointer to an allocated buffer of size `capacity`. + * @param capacity The capacity of the memory pointed to by `buffer`. + * @param features Architectural features to make the assembler aware of. + * + * @pre The given memory buffer must not be null. + * @pre The given memory buffer must be at minimum `capacity` bytes in size. + * + * @note The caller is responsible for managing the lifetime of the given memory. + * CodeBuffer will *not* free the memory once it goes out of scope. + */ + [[nodiscard]] explicit Assembler(uint8_t* buffer, size_t capacity, + ArchFeature features = ArchFeature::RV64); + + // Copy constructor and assignment. + Assembler(const Assembler&) = delete; + Assembler& operator=(const Assembler&) = delete; + + // Move constructor and assignment. + Assembler(Assembler&&) = default; + Assembler& operator=(Assembler&&) = default; + + // Destructor + virtual ~Assembler(); + + /** + * Tells the assembler what features to take into account. + * + * Will alter how some code is emitted and also enforce asserts suitable + * for those particular features. + */ + void SetArchFeatures(ArchFeature features) noexcept { + m_features = features; + } + + /// Gets the underlying code buffer being managed by this assembler. + CodeBuffer& GetCodeBuffer(); + + /** + * Allows swapping out the code buffer used by the assembler. + * + * @param buffer The new buffer for the assembler to emit code into. + * + * @returns The old buffer that the assembler made use of. + */ + CodeBuffer SwapCodeBuffer(CodeBuffer&& buffer) noexcept; + + /** + * Allows rewinding of the code buffer cursor. + * + * @param offset The offset to rewind the cursor by. + * + * @note If no offset is provided, then this function rewinds the + * cursor to the beginning of the buffer. + * + * @note The offset may not be larger than the current cursor offset + * and may not be less than the current buffer starting address. + */ + void RewindBuffer(ptrdiff_t offset = 0) { + m_buffer.RewindCursor(offset); + } + + /// Retrieves the cursor pointer for the underlying code buffer. + [[nodiscard]] uint8_t* GetCursorPointer() noexcept { + return m_buffer.GetCursorPointer(); + } + + /// Retrieves the cursor for the underlying code buffer. + [[nodiscard]] const uint8_t* GetCursorPointer() const noexcept { + return m_buffer.GetCursorPointer(); + } + + /// Retrieves the pointer to an arbitrary location within the underlying code buffer. + [[nodiscard]] uint8_t* GetBufferPointer(ptrdiff_t offset) noexcept { + return m_buffer.GetOffsetPointer(offset); + } + + /// Retrieves the pointer to an arbitrary location within the underlying code buffer. + [[nodiscard]] const uint8_t* GetBufferPointer(ptrdiff_t offset) const noexcept { + return m_buffer.GetOffsetPointer(offset); + } + + /** + * Binds a label to the current offset within the code buffer + * + * @param label A non-null valid label to bind. + */ + void Bind(Label* label); + + // RV32I Instructions + + void ADD(GPR rd, GPR lhs, GPR rhs) noexcept; + void ADDI(GPR rd, GPR rs, int32_t imm) noexcept; + void AND(GPR rd, GPR lhs, GPR rhs) noexcept; + void ANDI(GPR rd, GPR rs, uint32_t imm) noexcept; + + void AUIPC(GPR rd, int32_t imm) noexcept; + + void BEQ(GPR rs1, GPR rs2, Label* label) noexcept; + void BEQZ(GPR rs, Label* label) noexcept; + void BGE(GPR rs1, GPR rs2, Label* label) noexcept; + void BGEU(GPR rs1, GPR rs2, Label* label) noexcept; + void BGEZ(GPR rs, Label* label) noexcept; + void BGT(GPR rs, GPR rt, Label* label) noexcept; + void BGTU(GPR rs, GPR rt, Label* label) noexcept; + void BGTZ(GPR rs, Label* label) noexcept; + void BLE(GPR rs, GPR rt, Label* label) noexcept; + void BLEU(GPR rs, GPR rt, Label* label) noexcept; + void BLEZ(GPR rs, Label* label) noexcept; + void BLT(GPR rs1, GPR rs2, Label* label) noexcept; + void BLTU(GPR rs1, GPR rs2, Label* label) noexcept; + void BLTZ(GPR rs, Label* label) noexcept; + void BNE(GPR rs1, GPR rs2, Label* label) noexcept; + void BNEZ(GPR rs, Label* label) noexcept; + + void BEQ(GPR rs1, GPR rs2, int32_t imm) noexcept; + void BEQZ(GPR rs, int32_t imm) noexcept; + void BGE(GPR rs1, GPR rs2, int32_t imm) noexcept; + void BGEU(GPR rs1, GPR rs2, int32_t imm) noexcept; + void BGEZ(GPR rs, int32_t imm) noexcept; + void BGT(GPR rs, GPR rt, int32_t imm) noexcept; + void BGTU(GPR rs, GPR rt, int32_t imm) noexcept; + void BGTZ(GPR rs, int32_t imm) noexcept; + void BLE(GPR rs, GPR rt, int32_t imm) noexcept; + void BLEU(GPR rs, GPR rt, int32_t imm) noexcept; + void BLEZ(GPR rs, int32_t imm) noexcept; + void BLT(GPR rs1, GPR rs2, int32_t imm) noexcept; + void BLTU(GPR rs1, GPR rs2, int32_t imm) noexcept; + void BLTZ(GPR rs, int32_t imm) noexcept; + void BNE(GPR rs1, GPR rs2, int32_t imm) noexcept; + void BNEZ(GPR rs, int32_t imm) noexcept; + + void CALL(int32_t offset) noexcept; + + void EBREAK() noexcept; + void ECALL() noexcept; + + void FENCE() noexcept; + void FENCE(FenceOrder pred, FenceOrder succ) noexcept; + void FENCEI(GPR rd = x0, GPR rs = x0, uint32_t imm = 0) noexcept; + void FENCETSO() noexcept; + + void J(Label* label) noexcept; + void JAL(Label* label) noexcept; + void JAL(GPR rd, Label* label) noexcept; + + void J(int32_t imm) noexcept; + void JAL(int32_t imm) noexcept; + void JAL(GPR rd, int32_t imm) noexcept; + void JALR(GPR rs) noexcept; + void JALR(GPR rd, int32_t imm, GPR rs1) noexcept; + void JR(GPR rs) noexcept; + + void LB(GPR rd, int32_t imm, GPR rs) noexcept; + void LBU(GPR rd, int32_t imm, GPR rs) noexcept; + void LH(GPR rd, int32_t imm, GPR rs) noexcept; + void LHU(GPR rd, int32_t imm, GPR rs) noexcept; + void LI(GPR rd, uint64_t imm) noexcept; + void LUI(GPR rd, uint32_t imm) noexcept; + void LW(GPR rd, int32_t imm, GPR rs) noexcept; + + void MV(GPR rd, GPR rs) noexcept; + void NEG(GPR rd, GPR rs) noexcept; + + void NOP() noexcept; + + void NOT(GPR rd, GPR rs) noexcept; + void OR(GPR rd, GPR lhs, GPR rhs) noexcept; + void ORI(GPR rd, GPR rs, uint32_t imm) noexcept; + + void PAUSE() noexcept; + void RET() noexcept; + + void SB(GPR rs2, int32_t imm, GPR rs1) noexcept; + void SH(GPR rs2, int32_t imm, GPR rs1) noexcept; + void SW(GPR rs2, int32_t imm, GPR rs1) noexcept; + + void SEQZ(GPR rd, GPR rs) noexcept; + void SGTZ(GPR rd, GPR rs) noexcept; + + void SLL(GPR rd, GPR lhs, GPR rhs) noexcept; + void SLLI(GPR rd, GPR rs, uint32_t shift) noexcept; + + void SLT(GPR rd, GPR lhs, GPR rhs) noexcept; + void SLTI(GPR rd, GPR rs, int32_t imm) noexcept; + void SLTIU(GPR rd, GPR rs, int32_t imm) noexcept; + void SLTU(GPR rd, GPR lhs, GPR rhs) noexcept; + void SLTZ(GPR rd, GPR rs) noexcept; + + void SNEZ(GPR rd, GPR rs) noexcept; + + void SRA(GPR rd, GPR lhs, GPR rhs) noexcept; + void SRAI(GPR rd, GPR rs, uint32_t shift) noexcept; + + void SRL(GPR rd, GPR lhs, GPR rhs) noexcept; + void SRLI(GPR rd, GPR rs, uint32_t shift) noexcept; + + void SUB(GPR rd, GPR lhs, GPR rhs) noexcept; + + void XOR(GPR rd, GPR lhs, GPR rhs) noexcept; + void XORI(GPR rd, GPR rs, uint32_t imm) noexcept; + + // RV64I Base Instruction Set + + void ADDIW(GPR rd, GPR rs, int32_t imm) noexcept; + void ADDW(GPR rd, GPR lhs, GPR rhs) noexcept; + void LD(GPR rd, int32_t imm, GPR rs) noexcept; + void LWU(GPR rd, int32_t imm, GPR rs) noexcept; + void SD(GPR rs2, int32_t imm, GPR rs1) noexcept; + + void SLLIW(GPR rd, GPR rs, uint32_t shift) noexcept; + void SRAIW(GPR rd, GPR rs, uint32_t shift) noexcept; + void SRLIW(GPR rd, GPR rs, uint32_t shift) noexcept; + + void SLLW(GPR rd, GPR lhs, GPR rhs) noexcept; + void SRAW(GPR rd, GPR lhs, GPR rhs) noexcept; + void SRLW(GPR rd, GPR lhs, GPR rhs) noexcept; + void SUBW(GPR rd, GPR lhs, GPR rhs) noexcept; + + // Zawrs Extension Instructions + void WRS_NTO() noexcept; + void WRS_STO() noexcept; + + // Zacas Extension Instructions + // + // NOTE: If targeting RV32 and using AMOCAS.D, rd and rs2 must be even-numbered + // registers, since they both indicate a register pair. + // + // On RV64, even and odd numbered registers can be used, + // + // On both RV32 and RV64, AMOCAS.Q requires rd and rs2 to be even-numbered + // since it also treats them like their own register pairs. + + void AMOCAS_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOCAS_Q(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOCAS_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + + // Zicond Extension Instructions + void CZERO_EQZ(GPR rd, GPR value, GPR condition) noexcept; + void CZERO_NEZ(GPR rd, GPR value, GPR condition) noexcept; + + // Zicsr Extension Instructions + + void CSRRC(GPR rd, CSR csr, GPR rs) noexcept; + void CSRRCI(GPR rd, CSR csr, uint32_t imm) noexcept; + void CSRRS(GPR rd, CSR csr, GPR rs) noexcept; + void CSRRSI(GPR rd, CSR csr, uint32_t imm) noexcept; + void CSRRW(GPR rd, CSR csr, GPR rs) noexcept; + void CSRRWI(GPR rd, CSR csr, uint32_t imm) noexcept; + + void CSRR(GPR rd, CSR csr) noexcept; + void CSWR(CSR csr, GPR rs) noexcept; + + void CSRS(CSR csr, GPR rs) noexcept; + void CSRC(CSR csr, GPR rs) noexcept; + + void CSRCI(CSR csr, uint32_t imm) noexcept; + void CSRSI(CSR csr, uint32_t imm) noexcept; + void CSRWI(CSR csr, uint32_t imm) noexcept; + + void FRCSR(GPR rd) noexcept; + void FSCSR(GPR rd, GPR rs) noexcept; + void FSCSR(GPR rs) noexcept; + + void FRRM(GPR rd) noexcept; + void FSRM(GPR rd, GPR rs) noexcept; + void FSRM(GPR rs) noexcept; + + void FSRMI(GPR rd, uint32_t imm) noexcept; + void FSRMI(uint32_t imm) noexcept; + + void FRFLAGS(GPR rd) noexcept; + void FSFLAGS(GPR rd, GPR rs) noexcept; + void FSFLAGS(GPR rs) noexcept; + + void FSFLAGSI(GPR rd, uint32_t imm) noexcept; + void FSFLAGSI(uint32_t imm) noexcept; + + void RDCYCLE(GPR rd) noexcept; + void RDCYCLEH(GPR rd) noexcept; + + void RDINSTRET(GPR rd) noexcept; + void RDINSTRETH(GPR rd) noexcept; + + void RDTIME(GPR rd) noexcept; + void RDTIMEH(GPR rd) noexcept; + + // Zihintntl Extension Instructions + + void C_NTL_ALL() noexcept; + void C_NTL_S1() noexcept; + void C_NTL_P1() noexcept; + void C_NTL_PALL() noexcept; + void NTL_ALL() noexcept; + void NTL_S1() noexcept; + void NTL_P1() noexcept; + void NTL_PALL() noexcept; + + // RV32M Extension Instructions + + void DIV(GPR rd, GPR rs1, GPR rs2) noexcept; + void DIVU(GPR rd, GPR rs1, GPR rs2) noexcept; + void MUL(GPR rd, GPR rs1, GPR rs2) noexcept; + void MULH(GPR rd, GPR rs1, GPR rs2) noexcept; + void MULHSU(GPR rd, GPR rs1, GPR rs2) noexcept; + void MULHU(GPR rd, GPR rs1, GPR rs2) noexcept; + void REM(GPR rd, GPR rs1, GPR rs2) noexcept; + void REMU(GPR rd, GPR rs1, GPR rs2) noexcept; + + // RV64M Extension Instructions + + void DIVW(GPR rd, GPR rs1, GPR rs2) noexcept; + void DIVUW(GPR rd, GPR rs1, GPR rs2) noexcept; + void MULW(GPR rd, GPR rs1, GPR rs2) noexcept; + void REMW(GPR rd, GPR rs1, GPR rs2) noexcept; + void REMUW(GPR rd, GPR rs1, GPR rs2) noexcept; + + // RV32A Extension Instructions + + void AMOADD_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOAND_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMAX_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMAXU_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMIN_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMINU_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOOR_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOSWAP_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOXOR_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void LR_W(Ordering ordering, GPR rd, GPR rs) noexcept; + void SC_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + + // RV64A Extension Instructions + + void AMOADD_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOAND_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMAX_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMAXU_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMIN_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOMINU_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOOR_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOSWAP_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void AMOXOR_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + void LR_D(Ordering ordering, GPR rd, GPR rs) noexcept; + void SC_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept; + + // RV32F Extension Instructions + + void FADD_S(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FCLASS_S(GPR rd, FPR rs1) noexcept; + void FCVT_S_W(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_S_WU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_W_S(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_WU_S(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FDIV_S(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FEQ_S(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLE_S(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLT_S(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLW(FPR rd, int32_t offset, GPR rs) noexcept; + void FMADD_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMAX_S(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMIN_S(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMSUB_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMUL_S(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FMV_W_X(FPR rd, GPR rs1) noexcept; + void FMV_X_W(GPR rd, FPR rs1) noexcept; + void FNMADD_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FNMSUB_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FSGNJ_S(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJN_S(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJX_S(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSQRT_S(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FSUB_S(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FSW(FPR rs2, int32_t offset, GPR rs1) noexcept; + + void FABS_S(FPR rd, FPR rs) noexcept; + void FMV_S(FPR rd, FPR rs) noexcept; + void FNEG_S(FPR rd, FPR rs) noexcept; + + // RV64F Extension Instructions + + void FCVT_L_S(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_LU_S(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_S_L(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_S_LU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + + // RV32D Extension Instructions + + void FADD_D(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FCLASS_D(GPR rd, FPR rs1) noexcept; + void FCVT_D_W(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_D_WU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_W_D(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_WU_D(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_D_S(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_S_D(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FDIV_D(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FEQ_D(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLE_D(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLT_D(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLD(FPR rd, int32_t offset, GPR rs) noexcept; + void FMADD_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMAX_D(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMIN_D(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMSUB_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMUL_D(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FNMADD_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FNMSUB_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FSGNJ_D(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJN_D(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJX_D(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSQRT_D(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FSUB_D(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FSD(FPR rs2, int32_t offset, GPR rs1) noexcept; + + void FABS_D(FPR rd, FPR rs) noexcept; + void FMV_D(FPR rd, FPR rs) noexcept; + void FNEG_D(FPR rd, FPR rs) noexcept; + + // RV64D Extension Instructions + + void FCVT_L_D(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_LU_D(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_D_L(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_D_LU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FMV_D_X(FPR rd, GPR rs1) noexcept; + void FMV_X_D(GPR rd, FPR rs1) noexcept; + + // RV32Q Extension Instructions + + void FADD_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FCLASS_Q(GPR rd, FPR rs1) noexcept; + void FCVT_Q_W(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_Q_WU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_W_Q(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_WU_Q(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_Q_D(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_D_Q(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_Q_S(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_S_Q(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FDIV_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FEQ_Q(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLE_Q(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLT_Q(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLQ(FPR rd, int32_t offset, GPR rs) noexcept; + void FMADD_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMAX_Q(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMIN_Q(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMSUB_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMUL_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FNMADD_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FNMSUB_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FSGNJ_Q(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJN_Q(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJX_Q(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSQRT_Q(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FSUB_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FSQ(FPR rs2, int32_t offset, GPR rs1) noexcept; + + void FABS_Q(FPR rd, FPR rs) noexcept; + void FMV_Q(FPR rd, FPR rs) noexcept; + void FNEG_Q(FPR rd, FPR rs) noexcept; + + // RV64Q Extension Instructions + + void FCVT_L_Q(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_LU_Q(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_Q_L(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_Q_LU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + + // RV32Zfh Extension Instructions + + void FADD_H(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FCLASS_H(GPR rd, FPR rs1) noexcept; + void FCVT_D_H(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_H_D(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_H_Q(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_H_S(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_H_W(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_H_WU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_Q_H(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_S_H(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_W_H(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_WU_H(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FDIV_H(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FEQ_H(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLE_H(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLH(FPR rd, int32_t offset, GPR rs) noexcept; + void FLT_H(GPR rd, FPR rs1, FPR rs2) noexcept; + void FMADD_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMAX_H(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMIN_H(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMSUB_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FMUL_H(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + void FMV_H_X(FPR rd, GPR rs1) noexcept; + void FMV_X_H(GPR rd, FPR rs1) noexcept; + void FNMADD_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FNMSUB_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode = RMode::DYN) noexcept; + void FSGNJ_H(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJN_H(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSGNJX_H(FPR rd, FPR rs1, FPR rs2) noexcept; + void FSH(FPR rs2, int32_t offset, GPR rs1) noexcept; + void FSQRT_H(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FSUB_H(FPR rd, FPR rs1, FPR rs2, RMode rmode = RMode::DYN) noexcept; + + // RV64Zfh Extension Instructions + + void FCVT_L_H(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_LU_H(GPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_H_L(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + void FCVT_H_LU(FPR rd, GPR rs1, RMode rmode = RMode::DYN) noexcept; + + // Zfa Extension Instructions + + void FLI_D(FPR rd, double value) noexcept; + void FLI_H(FPR rd, double value) noexcept; + void FLI_S(FPR rd, double value) noexcept; + + void FMINM_D(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMINM_H(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMINM_Q(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMINM_S(FPR rd, FPR rs1, FPR rs2) noexcept; + + void FMAXM_D(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMAXM_H(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMAXM_Q(FPR rd, FPR rs1, FPR rs2) noexcept; + void FMAXM_S(FPR rd, FPR rs1, FPR rs2) noexcept; + + void FROUND_D(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FROUND_H(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FROUND_Q(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FROUND_S(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + + void FROUNDNX_D(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FROUNDNX_H(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FROUNDNX_Q(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + void FROUNDNX_S(FPR rd, FPR rs1, RMode rmode = RMode::DYN) noexcept; + + void FCVTMOD_W_D(GPR rd, FPR rs1) noexcept; + + void FMVH_X_D(GPR rd, FPR rs1) noexcept; + void FMVH_X_Q(GPR rd, FPR rs1) noexcept; + void FMVP_D_X(FPR rd, GPR rs1, GPR rs2) noexcept; + void FMVP_Q_X(FPR rd, GPR rs1, GPR rs2) noexcept; + + void FLEQ_D(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLTQ_D(GPR rd, FPR rs1, FPR rs2) noexcept; + + void FLEQ_H(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLTQ_H(GPR rd, FPR rs1, FPR rs2) noexcept; + + void FLEQ_Q(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLTQ_Q(GPR rd, FPR rs1, FPR rs2) noexcept; + + void FLEQ_S(GPR rd, FPR rs1, FPR rs2) noexcept; + void FLTQ_S(GPR rd, FPR rs1, FPR rs2) noexcept; + + // Zfbfmin Extension Instructions + + void FCVT_BF16_S(FPR rd, FPR rs, RMode rmode = RMode::DYN) noexcept; + void FCVT_S_BF16(FPR rd, FPR rs, RMode rmode = RMode::DYN) noexcept; + + // RVB Extension Instructions (plus scalar crypto bit operations) + + void ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept; + void ANDN(GPR rd, GPR rs1, GPR rs2) noexcept; + void BCLR(GPR rd, GPR rs1, GPR rs2) noexcept; + void BCLRI(GPR rd, GPR rs, uint32_t bit) noexcept; + void BEXT(GPR rd, GPR rs1, GPR rs2) noexcept; + void BEXTI(GPR rd, GPR rs, uint32_t bit) noexcept; + void BINV(GPR rd, GPR rs1, GPR rs2) noexcept; + void BINVI(GPR rd, GPR rs, uint32_t bit) noexcept; + void BREV8(GPR rd, GPR rs) noexcept; + void BSET(GPR rd, GPR rs1, GPR rs2) noexcept; + void BSETI(GPR rd, GPR rs, uint32_t bit) noexcept; + void CLMUL(GPR rd, GPR rs1, GPR rs2) noexcept; + void CLMULH(GPR rd, GPR rs1, GPR rs2) noexcept; + void CLMULR(GPR rd, GPR rs1, GPR rs2) noexcept; + void CLZ(GPR rd, GPR rs) noexcept; + void CLZW(GPR rd, GPR rs) noexcept; + void CPOP(GPR rd, GPR rs) noexcept; + void CPOPW(GPR rd, GPR rs) noexcept; + void CTZ(GPR rd, GPR rs) noexcept; + void CTZW(GPR rd, GPR rs) noexcept; + void MAX(GPR rd, GPR rs1, GPR rs2) noexcept; + void MAXU(GPR rd, GPR rs1, GPR rs2) noexcept; + void MIN(GPR rd, GPR rs1, GPR rs2) noexcept; + void MINU(GPR rd, GPR rs1, GPR rs2) noexcept; + void ORCB(GPR rd, GPR rs) noexcept; + void ORN(GPR rd, GPR rs1, GPR rs2) noexcept; + void PACK(GPR rd, GPR rs1, GPR rs2) noexcept; + void PACKH(GPR rd, GPR rs1, GPR rs2) noexcept; + void PACKW(GPR rd, GPR rs1, GPR rs2) noexcept; + void REV8(GPR rd, GPR rs) noexcept; + void ROL(GPR rd, GPR rs1, GPR rs2) noexcept; + void ROLW(GPR rd, GPR rs1, GPR rs2) noexcept; + void ROR(GPR rd, GPR rs1, GPR rs2) noexcept; + void RORI(GPR rd, GPR rs, uint32_t rotate_amount) noexcept; + void RORIW(GPR rd, GPR rs, uint32_t rotate_amount) noexcept; + void RORW(GPR rd, GPR rs1, GPR rs2) noexcept; + void SEXTB(GPR rd, GPR rs) noexcept; + void SEXTH(GPR rd, GPR rs) noexcept; + void SH1ADD(GPR rd, GPR rs1, GPR rs2) noexcept; + void SH1ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept; + void SH2ADD(GPR rd, GPR rs1, GPR rs2) noexcept; + void SH2ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept; + void SH3ADD(GPR rd, GPR rs1, GPR rs2) noexcept; + void SH3ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept; + void SLLIUW(GPR rd, GPR rs, uint32_t shift_amount) noexcept; + void UNZIP(GPR rd, GPR rs) noexcept; + void XNOR(GPR rd, GPR rs1, GPR rs2) noexcept; + void XPERM4(GPR rd, GPR rs1, GPR rs2) noexcept; + void XPERM8(GPR rd, GPR rs1, GPR rs2) noexcept; + void ZEXTH(GPR rd, GPR rs) noexcept; + void ZEXTW(GPR rd, GPR rs) noexcept; + void ZIP(GPR rd, GPR rs) noexcept; + + // Scalar Cryptography (RVK) instructions + + void AES32DSI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept; + void AES32DSMI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept; + void AES32ESI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept; + void AES32ESMI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept; + void AES64DS(GPR rd, GPR rs1, GPR rs2) noexcept; + void AES64DSM(GPR rd, GPR rs1, GPR rs2) noexcept; + void AES64ES(GPR rd, GPR rs1, GPR rs2) noexcept; + void AES64ESM(GPR rd, GPR rs1, GPR rs2) noexcept; + void AES64IM(GPR rd, GPR rs) noexcept; + void AES64KS1I(GPR rd, GPR rs, uint32_t rnum) noexcept; + void AES64KS2(GPR rd, GPR rs1, GPR rs2) noexcept; + void SHA256SIG0(GPR rd, GPR rs) noexcept; + void SHA256SIG1(GPR rd, GPR rs) noexcept; + void SHA256SUM0(GPR rd, GPR rs) noexcept; + void SHA256SUM1(GPR rd, GPR rs) noexcept; + void SHA512SIG0(GPR rd, GPR rs) noexcept; + void SHA512SIG0H(GPR rd, GPR rs1, GPR rs2) noexcept; + void SHA512SIG0L(GPR rd, GPR rs1, GPR rs2) noexcept; + void SHA512SIG1(GPR rd, GPR rs) noexcept; + void SHA512SIG1H(GPR rd, GPR rs1, GPR rs2) noexcept; + void SHA512SIG1L(GPR rd, GPR rs1, GPR rs2) noexcept; + void SHA512SUM0(GPR rd, GPR rs) noexcept; + void SHA512SUM0R(GPR rd, GPR rs1, GPR rs2) noexcept; + void SHA512SUM1(GPR rd, GPR rs) noexcept; + void SHA512SUM1R(GPR rd, GPR rs1, GPR rs2) noexcept; + void SM3P0(GPR rd, GPR rs) noexcept; + void SM3P1(GPR rd, GPR rs) noexcept; + void SM4ED(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept; + void SM4KS(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept; + + // RVC Extension Instructions + + void C_ADD(GPR rd, GPR rs) noexcept; + void C_ADDI(GPR rd, int32_t imm) noexcept; + void C_ADDIW(GPR rd, int32_t imm) noexcept; + void C_ADDI4SPN(GPR rd, uint32_t imm) noexcept; + void C_ADDI16SP(int32_t imm) noexcept; + void C_ADDW(GPR rd, GPR rs) noexcept; + void C_AND(GPR rd, GPR rs) noexcept; + void C_ANDI(GPR rd, uint32_t imm) noexcept; + void C_BEQZ(GPR rs, int32_t offset) noexcept; + void C_BEQZ(GPR rs, Label* label) noexcept; + void C_BNEZ(GPR rs, int32_t offset) noexcept; + void C_BNEZ(GPR rs, Label* label) noexcept; + void C_EBREAK() noexcept; + void C_FLD(FPR rd, uint32_t imm, GPR rs) noexcept; + void C_FLDSP(FPR rd, uint32_t imm) noexcept; + void C_FLW(FPR rd, uint32_t imm, GPR rs) noexcept; + void C_FLWSP(FPR rd, uint32_t imm) noexcept; + void C_FSD(FPR rs2, uint32_t imm, GPR rs1) noexcept; + void C_FSDSP(FPR rs, uint32_t imm) noexcept; + void C_FSW(FPR rs2, uint32_t imm, GPR rs1) noexcept; + void C_FSWSP(FPR rs, uint32_t imm) noexcept; + void C_J(int32_t offset) noexcept; + void C_J(Label* label) noexcept; + void C_JAL(Label* label) noexcept; + void C_JAL(int32_t offset) noexcept; + void C_JALR(GPR rs) noexcept; + void C_JR(GPR rs) noexcept; + void C_LD(GPR rd, uint32_t imm, GPR rs) noexcept; + void C_LDSP(GPR rd, uint32_t imm) noexcept; + void C_LI(GPR rd, int32_t imm) noexcept; + void C_LQ(GPR rd, uint32_t imm, GPR rs) noexcept; + void C_LQSP(GPR rd, uint32_t imm) noexcept; + void C_LUI(GPR rd, uint32_t imm) noexcept; + void C_LW(GPR rd, uint32_t imm, GPR rs) noexcept; + void C_LWSP(GPR rd, uint32_t imm) noexcept; + void C_MV(GPR rd, GPR rs) noexcept; + void C_NOP() noexcept; + void C_OR(GPR rd, GPR rs) noexcept; + void C_SD(GPR rs2, uint32_t imm, GPR rs1) noexcept; + void C_SDSP(GPR rs, uint32_t imm) noexcept; + void C_SLLI(GPR rd, uint32_t shift) noexcept; + void C_SQ(GPR rs2, uint32_t imm, GPR rs1) noexcept; + void C_SQSP(GPR rs, uint32_t imm) noexcept; + void C_SRAI(GPR rd, uint32_t shift) noexcept; + void C_SRLI(GPR rd, uint32_t shift) noexcept; + void C_SUB(GPR rd, GPR rs) noexcept; + void C_SUBW(GPR rd, GPR rs) noexcept; + void C_SW(GPR rs2, uint32_t imm, GPR rs1) noexcept; + void C_SWSP(GPR rs, uint32_t imm) noexcept; + void C_UNDEF() noexcept; + void C_XOR(GPR rd, GPR rs) noexcept; + + // Zc Extension Instructions + + void C_LBU(GPR rd, uint32_t uimm, GPR rs) noexcept; + void C_LH(GPR rd, uint32_t uimm, GPR rs) noexcept; + void C_LHU(GPR rd, uint32_t uimm, GPR rs) noexcept; + void C_SB(GPR rs2, uint32_t uimm, GPR rs1) noexcept; + void C_SH(GPR rs2, uint32_t uimm, GPR rs1) noexcept; + + void C_SEXT_B(GPR rd) noexcept; + void C_SEXT_H(GPR rd) noexcept; + void C_ZEXT_B(GPR rd) noexcept; + void C_ZEXT_H(GPR rd) noexcept; + void C_ZEXT_W(GPR rd) noexcept; + + void C_MUL(GPR rsd, GPR rs2) noexcept; + void C_NOT(GPR rd) noexcept; + + void CM_MVA01S(GPR r1s, GPR r2s) noexcept; + void CM_MVSA01(GPR r1s, GPR r2s) noexcept; + + void CM_POP(PushPopList reg_list, int32_t stack_adj) noexcept; + void CM_POPRET(PushPopList reg_list, int32_t stack_adj) noexcept; + void CM_POPRETZ(PushPopList reg_list, int32_t stack_adj) noexcept; + void CM_PUSH(PushPopList reg_list, int32_t stack_adj) noexcept; + + void CM_JALT(uint32_t index) noexcept; + void CM_JT(uint32_t index) noexcept; + + // Cache Management Operation Extension Instructions + + void CBO_CLEAN(GPR rs) noexcept; + void CBO_FLUSH(GPR rs) noexcept; + void CBO_INVAL(GPR rs) noexcept; + void CBO_ZERO(GPR rs) noexcept; + void PREFETCH_I(GPR rs, int32_t offset = 0) noexcept; + void PREFETCH_R(GPR rs, int32_t offset = 0) noexcept; + void PREFETCH_W(GPR rs, int32_t offset = 0) noexcept; + + // Privileged Instructions + + void HFENCE_GVMA(GPR rs1, GPR rs2) noexcept; + void HFENCE_VVMA(GPR rs1, GPR rs2) noexcept; + void HINVAL_GVMA(GPR rs1, GPR rs2) noexcept; + void HINVAL_VVMA(GPR rs1, GPR rs2) noexcept; + void HLV_B(GPR rd, GPR rs) noexcept; + void HLV_BU(GPR rd, GPR rs) noexcept; + void HLV_D(GPR rd, GPR rs) noexcept; + void HLV_H(GPR rd, GPR rs) noexcept; + void HLV_HU(GPR rd, GPR rs) noexcept; + void HLV_W(GPR rd, GPR rs) noexcept; + void HLV_WU(GPR rd, GPR rs) noexcept; + void HLVX_HU(GPR rd, GPR rs) noexcept; + void HLVX_WU(GPR rd, GPR rs) noexcept; + void HSV_B(GPR rs2, GPR rs1) noexcept; + void HSV_D(GPR rs2, GPR rs1) noexcept; + void HSV_H(GPR rs2, GPR rs1) noexcept; + void HSV_W(GPR rs2, GPR rs1) noexcept; + void MRET() noexcept; + void SFENCE_INVAL_IR() noexcept; + void SFENCE_VMA(GPR rs1, GPR rs2) noexcept; + void SFENCE_W_INVAL() noexcept; + void SINVAL_VMA(GPR rs1, GPR rs2) noexcept; + void SRET() noexcept; + void URET() noexcept; + void WFI() noexcept; + + // Vector Extension Instructions + + // Vector Integer Instructions + + void VAADD(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VAADD(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VAADDU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VAADDU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VADC(Vec vd, Vec vs2, Vec vs1) noexcept; + void VADC(Vec vd, Vec vs2, GPR rs1) noexcept; + void VADC(Vec vd, Vec vs2, int32_t simm) noexcept; + + void VADD(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VADD(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VADD(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VAND(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VAND(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VAND(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VASUB(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VASUB(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VASUBU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VASUBU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VCOMPRESS(Vec vd, Vec vs2, Vec vs1) noexcept; + + void VDIV(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VDIV(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VDIVU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VDIVU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFIRST(GPR rd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VID(Vec vd, VecMask mask = VecMask::No) noexcept; + + void VIOTA(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VMACC(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VMADC(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMADC(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VMADC(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VMADD(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VMADD(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VMAND(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMANDNOT(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMNAND(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMNOR(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMOR(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMORNOT(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMXNOR(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMXOR(Vec vd, Vec vs2, Vec vs1) noexcept; + + void VMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMAX(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMAXU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMAXU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMERGE(Vec vd, Vec vs2, Vec vs1) noexcept; + void VMERGE(Vec vd, Vec vs2, GPR rs1) noexcept; + void VMERGE(Vec vd, Vec vs2, int32_t simm) noexcept; + + void VMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMIN(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMINU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMINU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMSBC(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMSBC(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMSBF(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VMSIF(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VMSOF(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VMSEQ(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMSEQ(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VMSEQ(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VMSGT(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VMSGT(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VMSGTU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VMSGTU(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VMSLE(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMSLE(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VMSLE(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VMSLEU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMSLEU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VMSLEU(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VMSLT(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMSLT(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMSLTU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMSLTU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMSNE(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMSNE(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VMSNE(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMULH(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMULH(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMULHSU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMULHSU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMULHU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMULHU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMV(Vec vd, Vec vs1) noexcept; + void VMV(Vec vd, GPR rs1) noexcept; + void VMV(Vec vd, int32_t simm) noexcept; + + void VMV1R(Vec vd, Vec vs) noexcept; + void VMV2R(Vec vd, Vec vs) noexcept; + void VMV4R(Vec vd, Vec vs) noexcept; + void VMV8R(Vec vd, Vec vs) noexcept; + + void VMV_SX(Vec vd, GPR rs) noexcept; + void VMV_XS(GPR rd, Vec vs) noexcept; + + void VNCLIP(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VNCLIP(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VNCLIP(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VNCLIPU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VNCLIPU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VNCLIPU(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VNMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VNMSAC(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VNMSUB(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VNMSUB(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VNSRA(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VNSRA(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VNSRA(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VNSRL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VNSRL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VNSRL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VOR(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VOR(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VOR(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VPOPC(GPR rd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VREDAND(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREDMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREDMAXU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREDMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREDMINU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREDOR(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREDXOR(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + + void VREM(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREM(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VREMU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VREMU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VRGATHER(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VRGATHER(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VRGATHER(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VRGATHEREI16(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + + void VRSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VRSUB(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VSADD(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSADD(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSADD(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VSADDU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSADDU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSADDU(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VSBC(Vec vd, Vec vs2, Vec vs1) noexcept; + void VSBC(Vec vd, Vec vs2, GPR rs1) noexcept; + + void VSEXTVF2(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSEXTVF4(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSEXTVF8(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VSLIDE1DOWN(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSLIDEDOWN(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSLIDEDOWN(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VSLIDE1UP(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSLIDEUP(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSLIDEUP(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VSLL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSLL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSLL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VSMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VSRA(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSRA(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSRA(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VSRL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSRL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSRL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VSSRA(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSSRA(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSSRA(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VSSRL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSSRL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VSSRL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VSSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VSSUBU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSSUBU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWADD(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWADD(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWADDW(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWADDW(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWADDU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWADDU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWADDUW(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWADDUW(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VWMACC(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VWMACCSU(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VWMACCSU(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VWMACCU(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VWMACCU(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VWMACCUS(Vec vd, GPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VWMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWMULSU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWMULSU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWMULU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWMULU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWREDSUMU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + + void VWSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWSUBW(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWSUBW(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWSUBU(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWSUBU(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VWSUBUW(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWSUBUW(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VXOR(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VXOR(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VXOR(Vec vd, Vec vs2, int32_t simm, VecMask mask = VecMask::No) noexcept; + + void VZEXTVF2(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VZEXTVF4(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VZEXTVF8(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + // Vector Floating-Point Instructions + + void VFADD(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFADD(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFCLASS(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VFCVT_F_X(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFCVT_F_XU(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFCVT_RTZ_X_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFCVT_RTZ_XU_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFCVT_X_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFCVT_XU_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VFNCVT_F_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFNCVT_F_X(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFNCVT_F_XU(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFNCVT_ROD_F_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFNCVT_RTZ_X_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFNCVT_RTZ_XU_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFNCVT_X_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFNCVT_XU_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VFWCVT_F_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFWCVT_F_X(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFWCVT_F_XU(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFWCVT_RTZ_X_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFWCVT_RTZ_XU_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFWCVT_X_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFWCVT_XU_F(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VFDIV(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFDIV(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + void VFRDIV(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFREDMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFREDMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + + void VFREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFREDOSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + + void VFMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFMADD(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFMADD(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFMAX(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFMERGE(Vec vd, Vec vs2, FPR rs1) noexcept; + + void VFMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFMIN(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFMSUB(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFMSUB(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFMUL(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFMV(Vec vd, FPR rs) noexcept; + void VFMV_FS(FPR rd, Vec vs) noexcept; + void VFMV_SF(Vec vd, FPR rs) noexcept; + + void VFNMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFNMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFNMADD(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFNMADD(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFNMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFNMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFNMSUB(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFNMSUB(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFREC7(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VFSGNJ(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFSGNJ(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFSGNJN(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFSGNJN(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFSGNJX(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFSGNJX(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFSQRT(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFRSQRT7(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VFSLIDE1DOWN(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + void VFSLIDE1UP(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFSUB(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + void VFRSUB(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFWADD(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFWADD(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFWADDW(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFWADDW(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFWMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFWMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFWMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFWMUL(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFWNMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFWNMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFWNMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFWNMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFWREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFWREDOSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + + void VFWMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFWMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VFWSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFWSUB(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VFWSUBW(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VFWSUBW(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMFEQ(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMFEQ(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMFGE(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + void VMFGT(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMFLE(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMFLE(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMFLT(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMFLT(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + void VMFNE(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VMFNE(Vec vd, Vec vs2, FPR rs1, VecMask mask = VecMask::No) noexcept; + + // Vector Load/Store Instructions + + void VLE8(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLE16(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLE32(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLE64(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLM(Vec vd, GPR rs) noexcept; + + void VLSE8(Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VLSE16(Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VLSE32(Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VLSE64(Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + + void VLOXEI8(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLOXEI16(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLOXEI32(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLOXEI64(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VLUXEI8(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLUXEI16(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLUXEI32(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLUXEI64(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VLE8FF(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLE16FF(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLE32FF(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLE64FF(Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + + void VLSEGE8(uint32_t num_segments, Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLSEGE16(uint32_t num_segments, Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLSEGE32(uint32_t num_segments, Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + void VLSEGE64(uint32_t num_segments, Vec vd, GPR rs, VecMask mask = VecMask::No) noexcept; + + void VLSSEGE8(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VLSSEGE16(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VLSSEGE32(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VLSSEGE64(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + + void VLOXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLOXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLOXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLOXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VLUXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLUXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLUXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VLUXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VLRE8(uint32_t num_registers, Vec vd, GPR rs) noexcept; + void VL1RE8(Vec vd, GPR rs) noexcept; + void VL2RE8(Vec vd, GPR rs) noexcept; + void VL4RE8(Vec vd, GPR rs) noexcept; + void VL8RE8(Vec vd, GPR rs) noexcept; + + void VLRE16(uint32_t num_registers, Vec vd, GPR rs) noexcept; + void VL1RE16(Vec vd, GPR rs) noexcept; + void VL2RE16(Vec vd, GPR rs) noexcept; + void VL4RE16(Vec vd, GPR rs) noexcept; + void VL8RE16(Vec vd, GPR rs) noexcept; + + void VLRE32(uint32_t num_registers, Vec vd, GPR rs) noexcept; + void VL1RE32(Vec vd, GPR rs) noexcept; + void VL2RE32(Vec vd, GPR rs) noexcept; + void VL4RE32(Vec vd, GPR rs) noexcept; + void VL8RE32(Vec vd, GPR rs) noexcept; + + void VLRE64(uint32_t num_registers, Vec vd, GPR rs) noexcept; + void VL1RE64(Vec vd, GPR rs) noexcept; + void VL2RE64(Vec vd, GPR rs) noexcept; + void VL4RE64(Vec vd, GPR rs) noexcept; + void VL8RE64(Vec vd, GPR rs) noexcept; + + void VSE8(Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + void VSE16(Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + void VSE32(Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + void VSE64(Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + void VSM(Vec vs, GPR rs) noexcept; + + void VSSE8(Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VSSE16(Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VSSE32(Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VSSE64(Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + + void VSOXEI8(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSOXEI16(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSOXEI32(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSOXEI64(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VSUXEI8(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSUXEI16(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSUXEI32(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSUXEI64(Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VSSEGE8(uint32_t num_segments, Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + void VSSEGE16(uint32_t num_segments, Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + void VSSEGE32(uint32_t num_segments, Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + void VSSEGE64(uint32_t num_segments, Vec vs, GPR rs, VecMask mask = VecMask::No) noexcept; + + void VSSSEGE8(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VSSSEGE16(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VSSSEGE32(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + void VSSSEGE64(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask = VecMask::No) noexcept; + + void VSOXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSOXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSOXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSOXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VSUXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSUXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSUXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + void VSUXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VSR(uint32_t num_registers, Vec vs, GPR rs) noexcept; + void VS1R(Vec vs, GPR rs) noexcept; + void VS2R(Vec vs, GPR rs) noexcept; + void VS4R(Vec vs, GPR rs) noexcept; + void VS8R(Vec vs, GPR rs) noexcept; + + // Vector Configuration Setting Instructions + + void VSETIVLI(GPR rd, uint32_t imm, SEW sew, LMUL lmul = LMUL::M1, VTA vta = VTA::No, VMA vma = VMA::No) noexcept; + void VSETVL(GPR rd, GPR rs1, GPR rs2) noexcept; + void VSETVLI(GPR rd, GPR rs, SEW sew, LMUL lmul = LMUL::M1, VTA vta = VTA::No, VMA vma = VMA::No) noexcept; + + // Vector Cryptography Instructions + + void VANDN(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VANDN(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VBREV(Vec vd, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VBREV8(Vec vd, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VREV8(Vec vd, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VCLZ(Vec vd, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VCTZ(Vec vd, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VCPOP(Vec vd, Vec vs2, VecMask mask = VecMask::No) noexcept; + + void VROL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VROL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VROR(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VROR(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VROR(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VWSLL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VWSLL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + void VWSLL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask = VecMask::No) noexcept; + + void VCLMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VCLMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VCLMULH(Vec vd, Vec vs2, Vec vs1, VecMask mask = VecMask::No) noexcept; + void VCLMULH(Vec vd, Vec vs2, GPR rs1, VecMask mask = VecMask::No) noexcept; + + void VGHSH(Vec vd, Vec vs2, Vec vs1) noexcept; + void VGMUL(Vec vd, Vec vs2) noexcept; + + void VAESDF_VV(Vec vd, Vec vs2) noexcept; + void VAESDF_VS(Vec vd, Vec vs2) noexcept; + + void VAESDM_VV(Vec vd, Vec vs2) noexcept; + void VAESDM_VS(Vec vd, Vec vs2) noexcept; + + void VAESEF_VV(Vec vd, Vec vs2) noexcept; + void VAESEF_VS(Vec vd, Vec vs2) noexcept; + + void VAESEM_VV(Vec vd, Vec vs2) noexcept; + void VAESEM_VS(Vec vd, Vec vs2) noexcept; + + void VAESKF1(Vec vd, Vec vs2, uint32_t uimm) noexcept; + void VAESKF2(Vec vd, Vec vs2, uint32_t uimm) noexcept; + + void VAESZ(Vec vd, Vec vs2) noexcept; + + void VSHA2MS(Vec vd, Vec vs2, Vec vs1) noexcept; + void VSHA2CH(Vec vd, Vec vs2, Vec vs1) noexcept; + void VSHA2CL(Vec vd, Vec vs2, Vec vs1) noexcept; + + void VSM4K(Vec vd, Vec vs2, uint32_t uimm) noexcept; + void VSM4R_VV(Vec vd, Vec vs2) noexcept; + void VSM4R_VS(Vec vd, Vec vs2) noexcept; + + void VSM3C(Vec vd, Vec vs2, uint32_t uimm) noexcept; + void VSM3ME(Vec vd, Vec vs2, Vec vs1) noexcept; + + // Zvfbfmin, Zvfbfwma Extension Instructions + + void VFNCVTBF16_F_F_W(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + void VFWCVTBF16_F_F_V(Vec vd, Vec vs, VecMask mask = VecMask::No) noexcept; + + void VFWMACCBF16(Vec vd, FPR rs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + void VFWMACCBF16(Vec vd, Vec vs1, Vec vs2, VecMask mask = VecMask::No) noexcept; + +private: + // Binds a label to a given offset. + void BindToOffset(Label* label, Label::LocationOffset offset); + + // Links the given label and returns the offset to it. + ptrdiff_t LinkAndGetOffset(Label* label); + + // Resolves all label offsets and patches any necessary + // branch offsets into the branch instructions that + // requires them. + void ResolveLabelOffsets(Label* label); + + CodeBuffer m_buffer; + ArchFeature m_features = ArchFeature::RV64; +}; + +} // namespace biscuit diff --git a/include/biscuit/assert.hpp b/include/biscuit/assert.hpp new file mode 100644 index 00000000..f6c5fa97 --- /dev/null +++ b/include/biscuit/assert.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include +#include + +#define BISCUIT_ASSERT(condition) \ + do { \ + if (!(condition)) { \ + std::printf("Assertion failed (%s)\nin %s, function %s line %i\n", \ + #condition, \ + __FILE__, __func__, __LINE__); \ + std::abort(); \ + } \ + } while (false) diff --git a/include/biscuit/code_buffer.hpp b/include/biscuit/code_buffer.hpp new file mode 100644 index 00000000..46314e48 --- /dev/null +++ b/include/biscuit/code_buffer.hpp @@ -0,0 +1,211 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace biscuit { + +/** + * An arbitrarily sized buffer that code is written into. + * + * Also contains other member functions for manipulating + * the data within the code buffer. + */ +class CodeBuffer { +public: + // Default capacity of 4KB. + static constexpr size_t default_capacity = 4096; + + /** + * Constructor + * + * @param capacity The initial capacity of the code buffer in bytes. + */ + explicit CodeBuffer(size_t capacity = default_capacity); + + /** + * Constructor + * + * @param buffer A non-null pointer to an allocated buffer of size `capacity`. + * @param capacity The capacity of the memory pointed to by `buffer`. + * + * @pre The given memory buffer must not be null. + * @pre The given memory buffer must be at minimum `capacity` bytes in size. + * + * @note The caller is responsible for managing the lifetime of the given memory. + * CodeBuffer will *not* free the memory once it goes out of scope. + */ + explicit CodeBuffer(uint8_t* buffer, size_t capacity); + + // Copy constructor and assignment is deleted in order to prevent unintentional memory leaks. + CodeBuffer(const CodeBuffer&) = delete; + CodeBuffer& operator=(const CodeBuffer&) = delete; + + // Move constructing or moving the buffer in general is allowed, as it's a transfer of control. + CodeBuffer(CodeBuffer&& other) noexcept; + CodeBuffer& operator=(CodeBuffer&& other) noexcept; + + /** + * Destructor + * + * If a custom memory buffer is not given to the code buffer, + * then the code buffer will automatically free any memory + * it had allocated in order to be able to emit code. + */ + ~CodeBuffer() noexcept; + + /// Returns whether or not the memory is managed by the code buffer. + [[nodiscard]] bool IsManaged() const noexcept { return m_is_managed; } + + /// Retrieves the current cursor position within the buffer. + [[nodiscard]] ptrdiff_t GetCursorOffset() const noexcept { + return m_cursor - m_buffer; + } + + /// Retrieves the current address of the cursor within the buffer. + [[nodiscard]] uintptr_t GetCursorAddress() const noexcept { + return GetOffsetAddress(GetCursorOffset()); + } + + /// Retrieves the cursor pointer + [[nodiscard]] uint8_t* GetCursorPointer() noexcept { + return GetOffsetPointer(GetCursorOffset()); + } + + /// Retrieves the cursor pointer + [[nodiscard]] const uint8_t* GetCursorPointer() const noexcept { + return GetOffsetPointer(GetCursorOffset()); + } + + /// Retrieves the address of an arbitrary offset within the buffer. + [[nodiscard]] uintptr_t GetOffsetAddress(ptrdiff_t offset) const noexcept { + return reinterpret_cast(GetOffsetPointer(offset)); + } + + /// Retrieves the pointer to an arbitrary location within the buffer. + [[nodiscard]] uint8_t* GetOffsetPointer(ptrdiff_t offset) noexcept { + BISCUIT_ASSERT(offset >= 0 && offset <= GetCursorOffset()); + return m_buffer + offset; + } + + /// Retrieves the pointer to an arbitrary location within the buffer. + [[nodiscard]] const uint8_t* GetOffsetPointer(ptrdiff_t offset) const noexcept { + BISCUIT_ASSERT(offset >= 0 && offset <= GetCursorOffset()); + return m_buffer + offset; + } + + /** + * Allows rewinding of the code buffer cursor. + * + * @param offset The offset to rewind the cursor by. + * + * @note If no offset is provided, then this function rewinds the + * cursor to the beginning of the buffer. + * + * @note The offset may not be larger than the current cursor offset + * and may not be less than the current buffer starting address. + */ + void RewindCursor(ptrdiff_t offset = 0) noexcept { + auto* rewound = m_buffer + offset; + BISCUIT_ASSERT(m_buffer <= rewound && rewound <= m_cursor); + m_cursor = rewound; + } + + /** + * Whether or not the underlying buffer has enough room for the + * given number of bytes. + * + * @param num_bytes The number of bytes to store in the buffer. + */ + [[nodiscard]] bool HasSpaceFor(size_t num_bytes) const noexcept { + return GetRemainingBytes() >= num_bytes; + } + + /// Returns the size of the data written to the buffer in bytes. + [[nodiscard]] size_t GetSizeInBytes() const noexcept { + EnsureBufferRange(); + return static_cast(m_cursor - m_buffer); + } + + /// Returns the total number of remaining bytes in the buffer. + [[nodiscard]] size_t GetRemainingBytes() const noexcept { + EnsureBufferRange(); + return static_cast((m_buffer + m_capacity) - m_cursor); + } + + /** + * Grows the underlying memory of the code buffer + * + * @param new_capacity The new capacity of the code buffer in bytes. + * + * @pre The underlying memory of the code buffer *must* be managed + * by the code buffer itself. Attempts to grow the buffer + * with memory that is not managed by it will result in + * an assertion being hit. + * + * @note Calling this with a new capacity that is less than or equal + * to the current capacity of the buffer will result in + * this function doing nothing. + */ + void Grow(size_t new_capacity); + + /** + * Emits a given value into the code buffer. + * + * @param value The value to emit into the code buffer. + * @tparam T A trivially-copyable type. + */ + template + void Emit(T value) noexcept { + static_assert(std::is_trivially_copyable_v, + "It's undefined behavior to memcpy a non-trivially-copyable type."); + BISCUIT_ASSERT(HasSpaceFor(sizeof(T))); + + std::memcpy(m_cursor, &value, sizeof(T)); + m_cursor += sizeof(T); + } + + /// Emits a 16-bit value into the code buffer. + void Emit16(uint32_t value) noexcept { + Emit(static_cast(value)); + } + + /// Emits a 32-bit value into the code buffer. + void Emit32(uint32_t value) noexcept { + Emit(value); + } + + /** + * Sets the internal code buffer to be executable. + * + * @note This will make the contained region of memory non-writable + * to satisfy operating under W^X contexts. To make the + * region writable again, use SetWritable(). + */ + void SetExecutable(); + + /** + * Sets the internal code buffer to be writable + * + * @note This will make the contained region of memory non-executable + * to satisfy operating under W^X contexts. To make the region + * executable again, use SetExecutable(). + */ + void SetWritable(); + +private: + void EnsureBufferRange() const noexcept { + BISCUIT_ASSERT(m_cursor >= m_buffer && m_cursor <= m_buffer + m_capacity); + } + + uint8_t* m_buffer = nullptr; + uint8_t* m_cursor = nullptr; + size_t m_capacity = 0; + bool m_is_managed = false; +}; + +} // namespace biscuit diff --git a/include/biscuit/cpuinfo.hpp b/include/biscuit/cpuinfo.hpp new file mode 100644 index 00000000..b5efa739 --- /dev/null +++ b/include/biscuit/cpuinfo.hpp @@ -0,0 +1,101 @@ +// Copyright (c), 2022, KNS Group LLC (YADRO) +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +#pragma once + +#include +#include +#include +#include + +#if defined(__linux__) && defined(__riscv) +#include +#include +#include +#endif + +namespace biscuit { + +#ifndef COMPAT_HWCAP_ISA_I +#define COMPAT_HWCAP_ISA_I (1U << ('I' - 'A')) +#endif + +#ifndef COMPAT_HWCAP_ISA_M +#define COMPAT_HWCAP_ISA_M (1U << ('M' - 'A')) +#endif + +#ifndef COMPAT_HWCAP_ISA_A +#define COMPAT_HWCAP_ISA_A (1U << ('A' - 'A')) +#endif + +#ifndef COMPAT_HWCAP_ISA_F +#define COMPAT_HWCAP_ISA_F (1U << ('F' - 'A')) +#endif + +#ifndef COMPAT_HWCAP_ISA_D +#define COMPAT_HWCAP_ISA_D (1U << ('D' - 'A')) +#endif + +#ifndef COMPAT_HWCAP_ISA_C +#define COMPAT_HWCAP_ISA_C (1U << ('C' - 'A')) +#endif + +#ifndef COMPAT_HWCAP_ISA_V +#define COMPAT_HWCAP_ISA_V (1U << ('V' - 'A')) +#endif + +enum class RISCVExtension : uint64_t { + I = COMPAT_HWCAP_ISA_I, + M = COMPAT_HWCAP_ISA_M, + A = COMPAT_HWCAP_ISA_A, + F = COMPAT_HWCAP_ISA_F, + D = COMPAT_HWCAP_ISA_D, + C = COMPAT_HWCAP_ISA_C, + V = COMPAT_HWCAP_ISA_V +}; + +template +struct CSRReader : public biscuit::Assembler { + // Buffer capacity exactly for 2 instructions. + static constexpr size_t capacity = 8; + + CSRReader() : biscuit::Assembler{CSRReader::capacity} { + CSRR(a0, csr); + RET(); + } + + // Copy constructor and assignment. + CSRReader(const CSRReader&) = delete; + CSRReader& operator=(const CSRReader&) = delete; + + // Move constructor and assignment. + CSRReader(CSRReader&&) = default; + CSRReader& operator=(CSRReader&&) = default; + + template + CSRReaderFunc GetCode() { + this->GetCodeBuffer().SetExecutable(); + return reinterpret_cast(this->GetBufferPointer(0)); + } +}; + +/** + * Class that detects information about a RISC-V CPU. + */ +class CPUInfo { +public: + /** + * Checks if a particular RISC-V extension is available. + * + * @param extension The extension to check. + */ + bool Has(RISCVExtension extension) const; + + /// Returns the vector register length in bytes. + uint32_t GetVlenb() const; +}; + +} // namespace biscuit diff --git a/include/biscuit/csr.hpp b/include/biscuit/csr.hpp new file mode 100644 index 00000000..e31243ea --- /dev/null +++ b/include/biscuit/csr.hpp @@ -0,0 +1,443 @@ +#pragma once + +#include + +namespace biscuit { + +// Control and Status Register +enum class CSR : uint32_t { + // clang-format off + + // User-level CSRs + + UStatus = 0x000, // User status register + UIE = 0x004, // User interrupt-enable register + UTVEC = 0x005, // User trap handler base address + UScratch = 0x040, // Scratch register for user trap handlers + UEPC = 0x041, // User exception program counter + UCause = 0x042, // User trap cause + UTVal = 0x043, // User bad address or instruction + UIP = 0x044, // User interrupt pending + + FFlags = 0x001, // Floating-point Accrued Exceptions + FRM = 0x002, // Floating-point Dynamic Rounding Mode + FCSR = 0x003, // Floating-point Control and Status Register (frm + fflags) + + JVT = 0x017, // Table jump base vector and control register + + Cycle = 0xC00, // Cycle counter for RDCYCLE instruction. + Time = 0xC01, // Timer for RDTIME instruction. + InstRet = 0xC02, // Instructions retired counter for RDINSTRET instruction. + HPMCounter3 = 0xC03, // Performance-monitoring counter. + HPMCounter4 = 0xC04, // Performance-monitoring counter. + HPMCounter5 = 0xC05, // Performance-monitoring counter. + HPMCounter6 = 0xC06, // Performance-monitoring counter. + HPMCounter7 = 0xC07, // Performance-monitoring counter. + HPMCounter8 = 0xC08, // Performance-monitoring counter. + HPMCounter9 = 0xC09, // Performance-monitoring counter. + HPMCounter10 = 0xC0A, // Performance-monitoring counter. + HPMCounter11 = 0xC0B, // Performance-monitoring counter. + HPMCounter12 = 0xC0C, // Performance-monitoring counter. + HPMCounter13 = 0xC0D, // Performance-monitoring counter. + HPMCounter14 = 0xC0E, // Performance-monitoring counter. + HPMCounter15 = 0xC0F, // Performance-monitoring counter. + HPMCounter16 = 0xC10, // Performance-monitoring counter. + HPMCounter17 = 0xC11, // Performance-monitoring counter. + HPMCounter18 = 0xC12, // Performance-monitoring counter. + HPMCounter19 = 0xC13, // Performance-monitoring counter. + HPMCounter20 = 0xC14, // Performance-monitoring counter. + HPMCounter21 = 0xC15, // Performance-monitoring counter. + HPMCounter22 = 0xC16, // Performance-monitoring counter. + HPMCounter23 = 0xC17, // Performance-monitoring counter. + HPMCounter24 = 0xC18, // Performance-monitoring counter. + HPMCounter25 = 0xC19, // Performance-monitoring counter. + HPMCounter26 = 0xC1A, // Performance-monitoring counter. + HPMCounter27 = 0xC1B, // Performance-monitoring counter. + HPMCounter28 = 0xC1C, // Performance-monitoring counter. + HPMCounter29 = 0xC1D, // Performance-monitoring counter. + HPMCounter30 = 0xC1E, // Performance-monitoring counter. + HPMCounter31 = 0xC1F, // Performance-monitoring counter. + CycleH = 0xC80, // Upper 32 bits of cycle, RV32I only. + TimeH = 0xC81, // Upper 32 bits of time, RV32I only. + InstRetH = 0xC82, // Upper 32 bits of instret, RV32I only. + HPMCounter3H = 0xC83, // Upper 32 bits of HPMCounter3, RV32I only. + HPMCounter4H = 0xC84, // Upper 32 bits of HPMCounter4, RV32I only. + HPMCounter5H = 0xC85, // Upper 32 bits of HPMCounter5, RV32I only. + HPMCounter6H = 0xC86, // Upper 32 bits of HPMCounter6, RV32I only. + HPMCounter7H = 0xC87, // Upper 32 bits of HPMCounter7, RV32I only. + HPMCounter8H = 0xC88, // Upper 32 bits of HPMCounter8, RV32I only. + HPMCounter9H = 0xC89, // Upper 32 bits of HPMCounter9, RV32I only. + HPMCounter10H = 0xC8A, // Upper 32 bits of HPMCounter10, RV32I only. + HPMCounter11H = 0xC8B, // Upper 32 bits of HPMCounter11, RV32I only. + HPMCounter12H = 0xC8C, // Upper 32 bits of HPMCounter12, RV32I only. + HPMCounter13H = 0xC8D, // Upper 32 bits of HPMCounter13, RV32I only. + HPMCounter14H = 0xC8E, // Upper 32 bits of HPMCounter14, RV32I only. + HPMCounter15H = 0xC8F, // Upper 32 bits of HPMCounter15, RV32I only. + HPMCounter16H = 0xC90, // Upper 32 bits of HPMCounter16, RV32I only. + HPMCounter17H = 0xC91, // Upper 32 bits of HPMCounter17, RV32I only. + HPMCounter18H = 0xC92, // Upper 32 bits of HPMCounter18, RV32I only. + HPMCounter19H = 0xC93, // Upper 32 bits of HPMCounter19, RV32I only. + HPMCounter20H = 0xC94, // Upper 32 bits of HPMCounter20, RV32I only. + HPMCounter21H = 0xC95, // Upper 32 bits of HPMCounter21, RV32I only. + HPMCounter22H = 0xC96, // Upper 32 bits of HPMCounter22, RV32I only. + HPMCounter23H = 0xC97, // Upper 32 bits of HPMCounter23, RV32I only. + HPMCounter24H = 0xC98, // Upper 32 bits of HPMCounter24, RV32I only. + HPMCounter25H = 0xC99, // Upper 32 bits of HPMCounter25, RV32I only. + HPMCounter26H = 0xC9A, // Upper 32 bits of HPMCounter26, RV32I only. + HPMCounter27H = 0xC9B, // Upper 32 bits of HPMCounter27, RV32I only. + HPMCounter28H = 0xC9C, // Upper 32 bits of HPMCounter28, RV32I only. + HPMCounter29H = 0xC9D, // Upper 32 bits of HPMCounter29, RV32I only. + HPMCounter30H = 0xC9E, // Upper 32 bits of HPMCounter30, RV32I only. + HPMCounter31H = 0xC9F, // Upper 32 bits of HPMCounter31, RV32I only. + + // Supervisor-level CSRs + + SStatus = 0x100, // Supervisor status register + SEDeleg = 0x102, // Supervisor exception delegation register + SIDeleg = 0x103, // Supervisor interrupt delegation register + SIE = 0x104, // Supervisor interrupt-enable register + STVec = 0x105, // Supervisor trap handler base address + SCounterEn = 0x106, // Supervisor counter enable + + SEnvCfg = 0x10A, // Supervisor environment configuration register + + SScratch = 0x140, // Scratch register for supervisor trap handlers + SEPC = 0x141, // Supervisor exception program counter + SCause = 0x142, // Supervisor trap cause + STVal = 0x143, // Supervisor bad address or instruction + SIP = 0x144, // Supervisor interrupt pending. + + SISelect = 0x150, // Supervisor indirect register select + SIReg = 0x151, // Supervisor indirect register alias + + StopEI = 0x15C, // Supervisor top external interrupt (only with an IMSIC) + StopI = 0xDB0, // Supervisor top interrupt + + SIEH = 0x114, // Upper 32 bits of sie + SIPH = 0x154, // Upper 32 bits of sip + + STimeCmp = 0x14D, // Supervisor timer register + STimeCmpH = 0x15D, // Supervisor timer register, RV32 only + + SATP = 0x180, // Supervisor address translation and protection + + SContext = 0x5A8, // Supervisor-mode context register + + // Hypervisor-level CSRs + + HStatus = 0x600, // Hypervisor status register + HEDeleg = 0x602, // Hypervisor exception delegation register + HIDeleg = 0x603, // Hypervisor interrupt delegation register + HIE = 0x604, // Hypervisor interrupt-enable register + HCounterEn = 0x606, // Hypervisor counter enable + HGEIE = 0x607, // Hypervisor guest external interrupt-enable register + HVIEN = 0x608, // Hypervisor virtual interrupt enables + HVICTL = 0x609, // Hypervisor virtual interrupt control + + HIDelegH = 0x613, // Upper 32 bits of hideleg + HVIENH = 0x618, // Upper 32 bits of hvien + HVIPH = 0x655, // Upper 32 bits of hvip + HVIPrio1H = 0x656, // Upper 32 bits of hviprio1 + HVIPrio2H = 0x657, // Upper 32 bits of hviprio2 + VSIEH = 0x214, // Upper 32 bits of vsie + VSIPH = 0x254, // Upper 32 bits of vsiph + + HTVal = 0x643, // Hypervisor bad guest physical address + HIP = 0x644, // Hypervisor interrupt pending + HVIP = 0x645, // Hypervisor virtual interrupt pending + HVIPrio1 = 0x646, // Hypervisor VS-level interrupt priorities + HVIPrio2 = 0x647, // Hypervisor VS-level interrupt priorities + HTInst = 0x64A, // Hypervisor trap instruction (transformed) + HGEIP = 0xE12, // Hypervisor guest external interrupt pending + + HEnvCfg = 0x60A, // Hypervisor environment configuration register + HEnvCfgH = 0x61A, // Additional hypervisor environment configuration register, RV32 only + + HGATP = 0x680, // Hypervisor guest address translation and protection + + HContext = 0x6A8, // Hypervisor-mode context register + + HTimeDelta = 0x605, // Delta for VS/VU-mode timer + HTimeDeltaH = 0x615, // Upper 32 bits of HTimeDelta, HSXLEN=32 only + + VSStatus = 0x200, // Virtual supervisor status register + VSIE = 0x204, // Virtual supervisor interrupt-enable register + VSTVec = 0x205, // Virtual supervisor trap handler base address + VSScratch = 0x240, // Virtual supervisor scratch register + VSEPC = 0x241, // Virtual supervisor exception program register + VSCause = 0x242, // Virtual supervisor trap cause + VSTVal = 0x243, // Virtual supervisor bad address or instruction + VSIP = 0x244, // Virtual supervisor interrupt pending + + VSISelect = 0x250, // Virtual supervisor indirect register select + VSIReg = 0x251, // Virtual supervisor indirect register alias + + VStopEI = 0x25C, // Virtual supervisor top external interrupt (only with an IMSIC) + VStopI = 0xEB0, // Virtual supervisor top interrupt + + VSTimeCmp = 0x24D, // Virtual supervisor timer register + VSTimeCmpH = 0x25D, // Virtual supervisor timer register, RV32 only + + VSATP = 0x280, // Virtual supervisor address translation and protection + + // Machine-level CSRs + + MVendorID = 0xF11, // Vendor ID + MArchID = 0xF12, // Architecture ID + MImpID = 0xF13, // Implementation ID + MHartID = 0xF14, // Hardware Thread ID + MConfigPtr = 0xF15, // Pointer to configuration data structure + + MStatus = 0x300, // Machine status register + MISA = 0x301, // ISA and extensions + MEDeleg = 0x302, // Machine exception delegation register + MIDeleg = 0x303, // Machine interrupt delegation register + MIE = 0x304, // Machine interrupt-enable register + MRVec = 0x305, // Machine trap-handler base address + MCounterEn = 0x306, // Machine counter enable + MVIEN = 0x308, // Machine virtual interrupt enables + MVIP = 0x309, // Machine virtual interrupt-pending bits + MStatusH = 0x310, // Additional machine status register, RV32 only + + MIDelegH = 0x313, // Upper 32 bits of of mideleg (only with S-mode) + MIEH = 0x314, // Upper 32 bits of mie + MVIENH = 0x318, // Upper 32 bits of mvien (only with S-mode) + MVIPH = 0x319, // Upper 32 bits of mvip (only with S-mode) + MIPH = 0x354, // Upper 32 bits of mip + + MScratch = 0x340, // Scratch register for machine trap handlers + MEPC = 0x341, // Machine exception program counter + MCause = 0x342, // Machine trap cause + MTVal = 0x343, // Machine bad address or instruction + MIP = 0x344, // Machine interrupt pending + MTInst = 0x34A, // Machine trap instruction (transformed) + MTVal2 = 0x34B, // Machine bad guest physical address + + MISelect = 0x350, // Machine indirect register select + MIReg = 0x351, // Machine indirect register alias + + MTopEI = 0x35C, // Machine top external interrupt (only with an IMSIC) + MTopI = 0xFB0, // Machine top interrupt + + MEnvCfg = 0x30A, // Machine environment configuration register + MEnvCfgH = 0x31A, // Additional machine environment configuration register, RV32 only + MSecCfg = 0x747, // Machine security configuration register + MSecCfgH = 0x757, // Additional machine security configuration register, RV32 only + + PMPCfg0 = 0x3A0, // Physical memory protection configuration + PMPCfg1 = 0x3A1, // Physical memory protection configuration, RV32 only + PMPCfg2 = 0x3A2, // Physical memory protection configuration + PMPCfg3 = 0x3A3, // Physical memory protection configuration, RV32 only + PMPCfg4 = 0x3A4, // Physical memory protection configuration + PMPCfg5 = 0x3A5, // Physical memory protection configuration, RV32 only + PMPCfg6 = 0x3A6, // Physical memory protection configuration + PMPCfg7 = 0x3A7, // Physical memory protection configuration, RV32 only + PMPCfg8 = 0x3A8, // Physical memory protection configuration + PMPCfg9 = 0x3A9, // Physical memory protection configuration, RV32 only + PMPCfg10 = 0x3AA, // Physical memory protection configuration + PMPCfg11 = 0x3AB, // Physical memory protection configuration, RV32 only + PMPCfg12 = 0x3AC, // Physical memory protection configuration + PMPCfg13 = 0x3AD, // Physical memory protection configuration, RV32 only + PMPCfg14 = 0x3AE, // Physical memory protection configuration + PMPCfg15 = 0x3AF, // Physical memory protection configuration, RV32 only + PMPAddr0 = 0x3B0, // Physical memory protection address register + PMPAddr1 = 0x3B1, // Physical memory protection address register + PMPAddr2 = 0x3B2, // Physical memory protection address register + PMPAddr3 = 0x3B3, // Physical memory protection address register + PMPAddr4 = 0x3B4, // Physical memory protection address register + PMPAddr5 = 0x3B5, // Physical memory protection address register + PMPAddr6 = 0x3B6, // Physical memory protection address register + PMPAddr7 = 0x3B7, // Physical memory protection address register + PMPAddr8 = 0x3B8, // Physical memory protection address register + PMPAddr9 = 0x3B9, // Physical memory protection address register + PMPAddr10 = 0x3BA, // Physical memory protection address register + PMPAddr11 = 0x3BB, // Physical memory protection address register + PMPAddr12 = 0x3BC, // Physical memory protection address register + PMPAddr13 = 0x3BD, // Physical memory protection address register + PMPAddr14 = 0x3BE, // Physical memory protection address register + PMPAddr15 = 0x3BF, // Physical memory protection address register + PMPAddr16 = 0x3C0, // Physical memory protection address register + PMPAddr17 = 0x3C1, // Physical memory protection address register + PMPAddr18 = 0x3C2, // Physical memory protection address register + PMPAddr19 = 0x3C3, // Physical memory protection address register + PMPAddr20 = 0x3C4, // Physical memory protection address register + PMPAddr21 = 0x3C5, // Physical memory protection address register + PMPAddr22 = 0x3C6, // Physical memory protection address register + PMPAddr23 = 0x3C7, // Physical memory protection address register + PMPAddr24 = 0x3C8, // Physical memory protection address register + PMPAddr25 = 0x3C9, // Physical memory protection address register + PMPAddr26 = 0x3CA, // Physical memory protection address register + PMPAddr27 = 0x3CB, // Physical memory protection address register + PMPAddr28 = 0x3CC, // Physical memory protection address register + PMPAddr29 = 0x3CD, // Physical memory protection address register + PMPAddr30 = 0x3CE, // Physical memory protection address register + PMPAddr31 = 0x3CF, // Physical memory protection address register + PMPAddr32 = 0x3D0, // Physical memory protection address register + PMPAddr33 = 0x3D1, // Physical memory protection address register + PMPAddr34 = 0x3D2, // Physical memory protection address register + PMPAddr35 = 0x3D3, // Physical memory protection address register + PMPAddr36 = 0x3D4, // Physical memory protection address register + PMPAddr37 = 0x3D5, // Physical memory protection address register + PMPAddr38 = 0x3D6, // Physical memory protection address register + PMPAddr39 = 0x3D7, // Physical memory protection address register + PMPAddr40 = 0x3D8, // Physical memory protection address register + PMPAddr41 = 0x3D9, // Physical memory protection address register + PMPAddr42 = 0x3DA, // Physical memory protection address register + PMPAddr43 = 0x3DB, // Physical memory protection address register + PMPAddr44 = 0x3DC, // Physical memory protection address register + PMPAddr45 = 0x3DD, // Physical memory protection address register + PMPAddr46 = 0x3DE, // Physical memory protection address register + PMPAddr47 = 0x3DF, // Physical memory protection address register + PMPAddr48 = 0x3E0, // Physical memory protection address register + PMPAddr49 = 0x3E1, // Physical memory protection address register + PMPAddr50 = 0x3E2, // Physical memory protection address register + PMPAddr51 = 0x3E3, // Physical memory protection address register + PMPAddr52 = 0x3E4, // Physical memory protection address register + PMPAddr53 = 0x3E5, // Physical memory protection address register + PMPAddr54 = 0x3E6, // Physical memory protection address register + PMPAddr55 = 0x3E7, // Physical memory protection address register + PMPAddr56 = 0x3E8, // Physical memory protection address register + PMPAddr57 = 0x3E9, // Physical memory protection address register + PMPAddr58 = 0x3EA, // Physical memory protection address register + PMPAddr59 = 0x3EB, // Physical memory protection address register + PMPAddr60 = 0x3EC, // Physical memory protection address register + PMPAddr61 = 0x3ED, // Physical memory protection address register + PMPAddr62 = 0x3EE, // Physical memory protection address register + PMPAddr63 = 0x3EF, // Physical memory protection address register + + MNScratch = 0x740, // Resumable NMI scratch register + MNEPC = 0x741, // Resumable NMI program counter + MNCause = 0x742, // Resumable NMI cause + MNStatus = 0x744, // Resumable NMI status + + MCycle = 0xB00, // Machine cycle counter + MInstRet = 0xB02, // Machine instructions-retired counter + MHPMCounter3 = 0xB03, // Machine performance-monitoring counter + MHPMCounter4 = 0xB04, // Machine performance-monitoring counter + MHPMCounter5 = 0xB05, // Machine performance-monitoring counter + MHPMCounter6 = 0xB06, // Machine performance-monitoring counter + MHPMCounter7 = 0xB07, // Machine performance-monitoring counter + MHPMCounter8 = 0xB08, // Machine performance-monitoring counter + MHPMCounter9 = 0xB09, // Machine performance-monitoring counter + MHPMCounter10 = 0xB0A, // Machine performance-monitoring counter + MHPMCounter11 = 0xB0B, // Machine performance-monitoring counter + MHPMCounter12 = 0xB0C, // Machine performance-monitoring counter + MHPMCounter13 = 0xB0D, // Machine performance-monitoring counter + MHPMCounter14 = 0xB0E, // Machine performance-monitoring counter + MHPMCounter15 = 0xB0F, // Machine performance-monitoring counter + MHPMCounter16 = 0xB10, // Machine performance-monitoring counter + MHPMCounter17 = 0xB11, // Machine performance-monitoring counter + MHPMCounter18 = 0xB12, // Machine performance-monitoring counter + MHPMCounter19 = 0xB13, // Machine performance-monitoring counter + MHPMCounter20 = 0xB14, // Machine performance-monitoring counter + MHPMCounter21 = 0xB15, // Machine performance-monitoring counter + MHPMCounter22 = 0xB16, // Machine performance-monitoring counter + MHPMCounter23 = 0xB17, // Machine performance-monitoring counter + MHPMCounter24 = 0xB18, // Machine performance-monitoring counter + MHPMCounter25 = 0xB19, // Machine performance-monitoring counter + MHPMCounter26 = 0xB1A, // Machine performance-monitoring counter + MHPMCounter27 = 0xB1B, // Machine performance-monitoring counter + MHPMCounter28 = 0xB1C, // Machine performance-monitoring counter + MHPMCounter29 = 0xB1D, // Machine performance-monitoring counter + MHPMCounter30 = 0xB1E, // Machine performance-monitoring counter + MHPMCounter31 = 0xB1F, // Machine performance-monitoring counter + + MCycleH = 0xB80, // Upper 32 bits ofmcycle, RV32I only + MInstRetH = 0xB82, // Upper 32 bits ofminstret, RV32I only + + MHPMCounter3H = 0xB83, // Upper 32 bits of MHPMCounter3, RV32I only + MHPMCounter4H = 0xB84, // Upper 32 bits of MHPMCounter4, RV32I only + MHPMCounter5H = 0xB85, // Upper 32 bits of MHPMCounter5, RV32I only + MHPMCounter6H = 0xB86, // Upper 32 bits of MHPMCounter6, RV32I only + MHPMCounter7H = 0xB87, // Upper 32 bits of MHPMCounter7, RV32I only + MHPMCounter8H = 0xB88, // Upper 32 bits of MHPMCounter8, RV32I only + MHPMCounter9H = 0xB89, // Upper 32 bits of MHPMCounter9, RV32I only + MHPMCounter10H = 0xB8A, // Upper 32 bits of MHPMCounter10, RV32I only + MHPMCounter11H = 0xB8B, // Upper 32 bits of MHPMCounter11, RV32I only + MHPMCounter12H = 0xB8C, // Upper 32 bits of MHPMCounter12, RV32I only + MHPMCounter13H = 0xB8D, // Upper 32 bits of MHPMCounter13, RV32I only + MHPMCounter14H = 0xB8E, // Upper 32 bits of MHPMCounter14, RV32I only + MHPMCounter15H = 0xB8F, // Upper 32 bits of MHPMCounter15, RV32I only + MHPMCounter16H = 0xB90, // Upper 32 bits of MHPMCounter16, RV32I only + MHPMCounter17H = 0xB91, // Upper 32 bits of MHPMCounter17, RV32I only + MHPMCounter18H = 0xB92, // Upper 32 bits of MHPMCounter18, RV32I only + MHPMCounter19H = 0xB93, // Upper 32 bits of MHPMCounter19, RV32I only + MHPMCounter20H = 0xB94, // Upper 32 bits of MHPMCounter20, RV32I only + MHPMCounter21H = 0xB95, // Upper 32 bits of MHPMCounter21, RV32I only + MHPMCounter22H = 0xB96, // Upper 32 bits of MHPMCounter22, RV32I only + MHPMCounter23H = 0xB97, // Upper 32 bits of MHPMCounter23, RV32I only + MHPMCounter24H = 0xB98, // Upper 32 bits of MHPMCounter24, RV32I only + MHPMCounter25H = 0xB99, // Upper 32 bits of MHPMCounter25, RV32I only + MHPMCounter26H = 0xB9A, // Upper 32 bits of MHPMCounter26, RV32I only + MHPMCounter27H = 0xB9B, // Upper 32 bits of MHPMCounter27, RV32I only + MHPMCounter28H = 0xB9C, // Upper 32 bits of MHPMCounter28, RV32I only + MHPMCounter29H = 0xB9D, // Upper 32 bits of MHPMCounter29, RV32I only + MHPMCounter30H = 0xB9E, // Upper 32 bits of MHPMCounter30, RV32I only + MHPMCounter31H = 0xB9F, // Upper 32 bits of MHPMCounter31, RV32I only + + MCountInhibit = 0x320, // Machine counter-inhibit register + + MCycleCfg = 0x321, // Privilege mode filtering for cycle counter + MCycleCfgH = 0x721, // Privilege mode filtering for cycle counter (RV32) + MInstRetCfg = 0x322, // Privilege mode filtering for instret counters + MInstRetCfgH = 0x722, // Privilege mode filtering for instret counters (RV32) + + MHPMEvent3 = 0x323, // Machine performance-monitoring event selector + MHPMEvent4 = 0x324, // Machine performance-monitoring event selector + MHPMEvent5 = 0x325, // Machine performance-monitoring event selector + MHPMEvent6 = 0x326, // Machine performance-monitoring event selector + MHPMEvent7 = 0x327, // Machine performance-monitoring event selector + MHPMEvent8 = 0x328, // Machine performance-monitoring event selector + MHPMEvent9 = 0x329, // Machine performance-monitoring event selector + MHPMEvent10 = 0x32A, // Machine performance-monitoring event selector + MHPMEvent11 = 0x32B, // Machine performance-monitoring event selector + MHPMEvent12 = 0x32C, // Machine performance-monitoring event selector + MHPMEvent13 = 0x32D, // Machine performance-monitoring event selector + MHPMEvent14 = 0x32E, // Machine performance-monitoring event selector + MHPMEvent15 = 0x32F, // Machine performance-monitoring event selector + MHPMEvent16 = 0x330, // Machine performance-monitoring event selector + MHPMEvent17 = 0x331, // Machine performance-monitoring event selector + MHPMEvent18 = 0x332, // Machine performance-monitoring event selector + MHPMEvent19 = 0x333, // Machine performance-monitoring event selector + MHPMEvent20 = 0x334, // Machine performance-monitoring event selector + MHPMEvent21 = 0x335, // Machine performance-monitoring event selector + MHPMEvent22 = 0x336, // Machine performance-monitoring event selector + MHPMEvent23 = 0x337, // Machine performance-monitoring event selector + MHPMEvent24 = 0x338, // Machine performance-monitoring event selector + MHPMEvent25 = 0x339, // Machine performance-monitoring event selector + MHPMEvent26 = 0x33A, // Machine performance-monitoring event selector + MHPMEvent27 = 0x33B, // Machine performance-monitoring event selector + MHPMEvent28 = 0x33C, // Machine performance-monitoring event selector + MHPMEvent29 = 0x33D, // Machine performance-monitoring event selector + MHPMEvent30 = 0x33E, // Machine performance-monitoring event selector + MHPMEvent31 = 0x33F, // Machine performance-monitoring event selector + + TSelect = 0x7A0, // Debug/Trace trigger register select + TData1 = 0x7A1, // First Debug/Trace trigger data register + TData2 = 0x7A2, // Second Debug/Trace trigger data register + TData3 = 0x7A3, // Third Debug/Trace trigger data register + MContext = 0x7A8, // Machine-mode context register + + DCSR = 0x7B0, // Debug control and status register + DPC = 0x7B1, // Debug PC + DScratch0 = 0x7B2, // Debug scratch register 0 + DScratch1 = 0x7B3, // Debug scratch register 1 + + // Scalar Cryptography Entropy Source Extension CSRs + + Seed = 0x015, // Entropy bit provider (up to 16 bits) + + // Vector Extension CSRs + + VStart = 0x008, // Vector start position + VXSat = 0x009, // Fixed-Point Saturate Flag + VXRM = 0x00A, // Fixed-Point Rounding Mode + VCSR = 0x00F, // Vector control and status register + VL = 0xC20, // Vector length + VType = 0xC21, // Vector data type register + VLenb = 0xC22, // Vector register length in bytes + + // clang-format on +}; + +} // namespace biscuit diff --git a/include/biscuit/isa.hpp b/include/biscuit/isa.hpp new file mode 100644 index 00000000..94a9c239 --- /dev/null +++ b/include/biscuit/isa.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include + +// Source file for general values and data structures +// that don't fit a particular criteria related to the ISA. + +namespace biscuit { + +enum class FenceOrder : uint32_t { + W = 1, // Write + R = 2, // Read + O = 4, // Device Output + I = 8, // Device Input + + RW = R | W, + + IO = I | O, + IR = I | R, + IW = I | W, + IRW = I | R | W, + + OI = O | I, + OR = O | R, + OW = O | W, + ORW = O | R | W, + + IORW = I | O | R | W, +}; + +// Atomic ordering +enum class Ordering : uint32_t { + None = 0, // None + RL = 1, // Release + AQ = 2, // Acquire + AQRL = AQ | RL, // Acquire-Release +}; + +// Floating-point Rounding Mode +enum class RMode : uint32_t { + RNE = 0b000, // Round to Nearest, ties to Even + RTZ = 0b001, // Round towards Zero + RDN = 0b010, // Round Down (towards negative infinity) + RUP = 0b011, // Round Up (towards positive infinity) + RMM = 0b100, // Round to Nearest, ties to Max Magnitude + DYN = 0b111, // Dynamic Rounding Mode +}; + +} // namespace biscuit diff --git a/include/biscuit/label.hpp b/include/biscuit/label.hpp new file mode 100644 index 00000000..8cfeeb2c --- /dev/null +++ b/include/biscuit/label.hpp @@ -0,0 +1,173 @@ +#pragma once + +#include +#include +#include +#include + +namespace biscuit { + +/** + * A label is a representation of an address that can be used with branch and jump instructions. + * + * Labels do not need to be bound to a location immediately. A label can be created + * to provide branches with a tentative, undecided location that is then bound + * at a later point in time. + * + * @note Any label that is created, is used with a branch instruction, + * but is *not* bound to a location (via Bind() in the assembler) + * will result in an assertion being invoked when the label instance's + * destructor is executed. + * + * @note A label may only be bound to one location. Any attempt to rebind + * a label that is already bound will result in an assertion being + * invoked. + * + * @par + * An example of binding a label: + * + * @code{.cpp} + * Assembler as{...}; + * Label label; + * + * as.BNE(x2, x3, &label); // Use the label + * as.ADD(x7, x8, x9); + * as.XOR(x7, x10, x12); + * as.Bind(&label); // Bind the label to a location + * @endcode + */ +class Label { +public: + using Location = std::optional; + using LocationOffset = Location::value_type; + + /** + * Default constructor. + * + * This constructor results in a label being constructed that is not + * bound to a particular location yet. + */ + explicit Label() = default; + + /// Destructor + ~Label() noexcept { + // It's a logic bug if something references a label and hasn't been handled. + // + // This is usually indicative of a scenario where a label is referenced but + // hasn't been bound to a location. + // + BISCUIT_ASSERT(IsResolved()); + } + + // We disable copying of labels, as this doesn't really make sense to do. + // It also presents a problem. When labels are being resolved, if we have + // two labels pointing to the same place, resolving the links to this address + // are going to clobber each other N times for however many copies of the label + // exist. + // + // This isn't a particularly major problem, since the resolving will still result + // in the same end result, but it does make it annoying to think about label interactions + // moving forward. Thus, I choose to simply not think about it at all! + // + Label(const Label&) = delete; + Label& operator=(const Label&) = delete; + + // Moving labels on the other hand is totally fine, this is just pushing data around + // to another label while invalidating the label having it's data "stolen". + Label(Label&&) noexcept = default; + Label& operator=(Label&&) noexcept = default; + + /** + * Determines whether or not this label instance has a location assigned to it. + * + * A label is considered bound if it has an assigned location. + */ + [[nodiscard]] bool IsBound() const noexcept { + return m_location.has_value(); + } + + /** + * Determines whether or not this label is resolved. + * + * A label is considered resolved when all referencing offsets have been handled. + */ + [[nodiscard]] bool IsResolved() const noexcept { + return m_offsets.empty(); + } + + /** + * Determines whether or not this label is unresolved. + * + * A label is considered unresolved if it still has any unhandled referencing offsets. + */ + [[nodiscard]] bool IsUnresolved() const noexcept { + return !IsResolved(); + } + + /** + * Retrieves the location for this label. + * + * @note If the returned location is empty, then this label has not been assigned + * a location yet. + */ + [[nodiscard]] Location GetLocation() const noexcept { + return m_location; + } + +private: + // A label instance is inherently bound to the assembler it's + // used with, as the offsets within the label set depend on + // said assemblers code buffer. + friend class Assembler; + + /** + * Binds a label to the given location. + * + * @param offset The instruction offset to bind this label to. + * + * @pre The label must not have already been bound to a previous location. + * Attempting to rebind a label is typically, in almost all scenarios, + * the source of bugs. + * Attempting to rebind an already bound label will result in an assertion + * being triggered. + */ + void Bind(LocationOffset offset) noexcept { + BISCUIT_ASSERT(!IsBound()); + m_location = offset; + } + + /** + * Marks the given address as dependent on this label. + * + * This is used in scenarios where a label exists, but has not yet been + * bound to a location yet. It's important to track these addresses, + * as we'll need to patch the dependent branch instructions with the + * proper offset once the label is finally bound by the assembler. + * + * During label binding, the offset will be calculated and inserted + * into dependent instructions. + */ + void AddOffset(LocationOffset offset) { + // If a label is already bound to a location, then offset tracking + // isn't necessary. Tripping this assert means we have a bug somewhere. + BISCUIT_ASSERT(!IsBound()); + BISCUIT_ASSERT(IsNewOffset(offset)); + + m_offsets.insert(offset); + } + + // Clears all the underlying offsets for this label. + void ClearOffsets() noexcept { + m_offsets.clear(); + } + + // Determines whether or not this address has already been added before. + [[nodiscard]] bool IsNewOffset(LocationOffset offset) const noexcept { + return m_offsets.find(offset) == m_offsets.cend(); + } + + std::set m_offsets; + Location m_location; +}; + +} // namespace biscuit diff --git a/include/biscuit/registers.hpp b/include/biscuit/registers.hpp new file mode 100644 index 00000000..08c47787 --- /dev/null +++ b/include/biscuit/registers.hpp @@ -0,0 +1,315 @@ +#pragma once + +#include + +#include +#include + +namespace biscuit { + +/** + * Generic abstraction around a register. + * + * This is less bug-prone than using raw primitive sizes + * in opcode emitter functions, since it provides stronger typing. + */ +class Register { +public: + constexpr Register() noexcept = default; + + /// Gets the index for this register. + [[nodiscard]] constexpr uint32_t Index() const noexcept { + return m_index; + } + + friend constexpr bool operator==(Register, Register) = default; + friend constexpr auto operator<=>(Register, Register) = default; + +protected: + constexpr explicit Register(uint32_t index) noexcept + : m_index{index} {} + +private: + uint32_t m_index{}; +}; + +/// General purpose register. +class GPR final : public Register { +public: + constexpr GPR() noexcept : Register{0} {} + constexpr explicit GPR(uint32_t index) noexcept : Register{index} {} + + friend constexpr bool operator==(GPR, GPR) = default; + friend constexpr auto operator<=>(GPR, GPR) = default; +}; + +/// Floating point register. +class FPR final : public Register { +public: + constexpr FPR() noexcept : Register{0} {} + constexpr explicit FPR(uint32_t index) noexcept : Register{index} {} + + friend constexpr bool operator==(FPR, FPR) = default; + friend constexpr auto operator<=>(FPR, FPR) = default; +}; + +/// Vector register. +class Vec final : public Register { +public: + constexpr Vec() noexcept : Register{0} {} + constexpr explicit Vec(uint32_t index) noexcept : Register{index} {} + + friend constexpr bool operator==(Vec, Vec) = default; + friend constexpr auto operator<=>(Vec, Vec) = default; +}; + +// General-purpose Registers + +constexpr GPR x0{0}; +constexpr GPR x1{1}; +constexpr GPR x2{2}; +constexpr GPR x3{3}; +constexpr GPR x4{4}; +constexpr GPR x5{5}; +constexpr GPR x6{6}; +constexpr GPR x7{7}; +constexpr GPR x8{8}; +constexpr GPR x9{9}; +constexpr GPR x10{10}; +constexpr GPR x11{11}; +constexpr GPR x12{12}; +constexpr GPR x13{13}; +constexpr GPR x14{14}; +constexpr GPR x15{15}; +constexpr GPR x16{16}; +constexpr GPR x17{17}; +constexpr GPR x18{18}; +constexpr GPR x19{19}; +constexpr GPR x20{20}; +constexpr GPR x21{21}; +constexpr GPR x22{22}; +constexpr GPR x23{23}; +constexpr GPR x24{24}; +constexpr GPR x25{25}; +constexpr GPR x26{26}; +constexpr GPR x27{27}; +constexpr GPR x28{28}; +constexpr GPR x29{29}; +constexpr GPR x30{30}; +constexpr GPR x31{31}; + +// Symbolic General-purpose Register Names + +constexpr GPR zero{x0}; + +constexpr GPR ra{x1}; +constexpr GPR sp{x2}; +constexpr GPR gp{x3}; +constexpr GPR tp{x4}; +constexpr GPR fp{x8}; + +constexpr GPR a0{x10}; +constexpr GPR a1{x11}; +constexpr GPR a2{x12}; +constexpr GPR a3{x13}; +constexpr GPR a4{x14}; +constexpr GPR a5{x15}; +constexpr GPR a6{x16}; +constexpr GPR a7{x17}; + +constexpr GPR s0{x8}; +constexpr GPR s1{x9}; +constexpr GPR s2{x18}; +constexpr GPR s3{x19}; +constexpr GPR s4{x20}; +constexpr GPR s5{x21}; +constexpr GPR s6{x22}; +constexpr GPR s7{x23}; +constexpr GPR s8{x24}; +constexpr GPR s9{x25}; +constexpr GPR s10{x26}; +constexpr GPR s11{x27}; + +constexpr GPR t0{x5}; +constexpr GPR t1{x6}; +constexpr GPR t2{x7}; +constexpr GPR t3{x28}; +constexpr GPR t4{x29}; +constexpr GPR t5{x30}; +constexpr GPR t6{x31}; + +// Floating-point registers + +constexpr FPR f0{0}; +constexpr FPR f1{1}; +constexpr FPR f2{2}; +constexpr FPR f3{3}; +constexpr FPR f4{4}; +constexpr FPR f5{5}; +constexpr FPR f6{6}; +constexpr FPR f7{7}; +constexpr FPR f8{8}; +constexpr FPR f9{9}; +constexpr FPR f10{10}; +constexpr FPR f11{11}; +constexpr FPR f12{12}; +constexpr FPR f13{13}; +constexpr FPR f14{14}; +constexpr FPR f15{15}; +constexpr FPR f16{16}; +constexpr FPR f17{17}; +constexpr FPR f18{18}; +constexpr FPR f19{19}; +constexpr FPR f20{20}; +constexpr FPR f21{21}; +constexpr FPR f22{22}; +constexpr FPR f23{23}; +constexpr FPR f24{24}; +constexpr FPR f25{25}; +constexpr FPR f26{26}; +constexpr FPR f27{27}; +constexpr FPR f28{28}; +constexpr FPR f29{29}; +constexpr FPR f30{30}; +constexpr FPR f31{31}; + +// Symbolic Floating-point Register Names + +constexpr FPR fa0{f10}; +constexpr FPR fa1{f11}; +constexpr FPR fa2{f12}; +constexpr FPR fa3{f13}; +constexpr FPR fa4{f14}; +constexpr FPR fa5{f15}; +constexpr FPR fa6{f16}; +constexpr FPR fa7{f17}; + +constexpr FPR ft0{f0}; +constexpr FPR ft1{f1}; +constexpr FPR ft2{f2}; +constexpr FPR ft3{f3}; +constexpr FPR ft4{f4}; +constexpr FPR ft5{f5}; +constexpr FPR ft6{f6}; +constexpr FPR ft7{f7}; +constexpr FPR ft8{f28}; +constexpr FPR ft9{f29}; +constexpr FPR ft10{f30}; +constexpr FPR ft11{f31}; + +constexpr FPR fs0{f8}; +constexpr FPR fs1{f9}; +constexpr FPR fs2{f18}; +constexpr FPR fs3{f19}; +constexpr FPR fs4{f20}; +constexpr FPR fs5{f21}; +constexpr FPR fs6{f22}; +constexpr FPR fs7{f23}; +constexpr FPR fs8{f24}; +constexpr FPR fs9{f25}; +constexpr FPR fs10{f26}; +constexpr FPR fs11{f27}; + +// Vector registers (V extension) + +constexpr Vec v0{0}; +constexpr Vec v1{1}; +constexpr Vec v2{2}; +constexpr Vec v3{3}; +constexpr Vec v4{4}; +constexpr Vec v5{5}; +constexpr Vec v6{6}; +constexpr Vec v7{7}; +constexpr Vec v8{8}; +constexpr Vec v9{9}; +constexpr Vec v10{10}; +constexpr Vec v11{11}; +constexpr Vec v12{12}; +constexpr Vec v13{13}; +constexpr Vec v14{14}; +constexpr Vec v15{15}; +constexpr Vec v16{16}; +constexpr Vec v17{17}; +constexpr Vec v18{18}; +constexpr Vec v19{19}; +constexpr Vec v20{20}; +constexpr Vec v21{21}; +constexpr Vec v22{22}; +constexpr Vec v23{23}; +constexpr Vec v24{24}; +constexpr Vec v25{25}; +constexpr Vec v26{26}; +constexpr Vec v27{27}; +constexpr Vec v28{28}; +constexpr Vec v29{29}; +constexpr Vec v30{30}; +constexpr Vec v31{31}; + +// Register utilities + +// Used with compressed stack management instructions +// (cm.push, cm.pop, etc) for building up a register list to encode. +// +// Also enforces that only valid registers are used in the lists. +class PushPopList final { +public: + // Represents an inclusive range ([start, end]) of registers. + struct Range final { + // Signifies an empty range. Normally this doesn't need to explicitly + // be created. Default parameters will usually take care of it. + constexpr Range() : start{UINT32_MAX}, end{UINT32_MAX} {} + + // This particular constructor is used for the case of rlist=5 + // where only ra and s0 get stored. + constexpr Range(GPR start_end) noexcept : start{start_end}, end{start_end} { + BISCUIT_ASSERT(start_end == s0); + } + + constexpr Range(GPR start_, GPR end_) noexcept : start{start_}, end{end_} { + BISCUIT_ASSERT(start_ == s0); + BISCUIT_ASSERT(IsSRegister(end_)); + + // See the Zc spec. The only way for s10 to be used is to also include s11. + BISCUIT_ASSERT(end_ != s10); + } + + GPR start; + GPR end; + }; + + // Deliberately non-explicit to allow for convenient instantiation at usage sites. + // e.g. Rather than CM.POP(PushPopList{ra, {s0, s2}}, 16), we can just have the + // usage be transparent like CM.POP({ra, {s0, s2}}, 16). Nice and compact! + constexpr PushPopList(GPR ra_reg, const Range& range = {}) noexcept + : m_bitmask{BuildBitmask(range)} { + BISCUIT_ASSERT(ra_reg == ra); + } + + // Gets the built-up bitmask of passed in registers + [[nodiscard]] constexpr uint32_t GetBitmask() const noexcept { + return m_bitmask; + } + +private: + [[nodiscard]] static constexpr uint32_t BuildBitmask(const Range& range) noexcept { + if (range.end.Index() == UINT32_MAX) { + return 4U; + } + if (range.end == s11) { + return 15U; + } + if (range.end == s0 || range.end == s1) { + return range.end.Index() - 3U; + } + return range.end.Index() - 11U; + } + + // Aside from ra, it's only valid for s0-s11 to show up the register list ranges. + [[nodiscard]] static constexpr bool IsSRegister(const GPR gpr) noexcept { + return gpr == s0 || gpr == s1 || (gpr >= s2 && gpr <= s11); + } + + uint32_t m_bitmask = 0; +}; + +} // namespace biscuit diff --git a/include/biscuit/vector.hpp b/include/biscuit/vector.hpp new file mode 100644 index 00000000..d31208ed --- /dev/null +++ b/include/biscuit/vector.hpp @@ -0,0 +1,88 @@ +#pragma once + +#include + +// Source file for anything specific to the RISC-V vector extension. + +namespace biscuit { + +/// Describes whether or not an instruction should make use of the mask vector. +enum class VecMask : uint32_t { + Yes = 0, + No = 1, +}; + +/// Describes the selected element width. +enum class SEW : uint32_t { + E8 = 0b000, // 8-bit vector elements + E16 = 0b001, // 16-bit vector elements + E32 = 0b010, // 32-bit vector elements + E64 = 0b011, // 64-bit vector elements + E128 = 0b100, // 128-bit vector elements + E256 = 0b101, // 256-bit vector elements + E512 = 0b110, // 512-bit vector elements + E1024 = 0b111, // 1024-bit vector elements +}; + +/// Describes the selected register group multiplier. +enum class LMUL : uint32_t { + M1 = 0b000, // Group of one vector + M2 = 0b001, // Groups of two vectors + M4 = 0b010, // Groups of four vectors + M8 = 0b011, // Groups of eight vectors + MF8 = 0b101, // Fractional vector group (1/8) + MF4 = 0b110, // Fractional vector group (1/4) + MF2 = 0b111, // Fractional vector group (1/2) +}; + +/** + * Describes whether or not vector masks are agnostic. + * + * From the RVV spec: + * + * When a set is marked undisturbed, the corresponding set of + * destination elements in a vector register group retain the + * value they previously held. + * + * When a set is marked agnostic, the corresponding set of destination + * elements in any vector destination operand can either retain the value + * they previously held, or are overwritten with 1s. + * + * Within a single vector instruction, each destination element can be either + * left undisturbed or overwritten with 1s, in any combination, and the pattern + * of undisturbed or overwritten with 1s is not required to be deterministic when + * the instruction is executed with the same inputs. In addition, except for + * mask load instructions, any element in the tail of a mask result can also be + * written with the value the mask-producing operation would have calculated with vl=VLMAX + */ +enum class VMA : uint32_t { + No, // Undisturbed + Yes, // Agnostic +}; + +/** + * Describes whether or not vector tail elements are agnostic. + * + * From the RVV spec: + * + * When a set is marked undisturbed, the corresponding set of + * destination elements in a vector register group retain the + * value they previously held. + * + * When a set is marked agnostic, the corresponding set of destination + * elements in any vector destination operand can either retain the value + * they previously held, or are overwritten with 1s. + * + * Within a single vector instruction, each destination element can be either + * left undisturbed or overwritten with 1s, in any combination, and the pattern + * of undisturbed or overwritten with 1s is not required to be deterministic when + * the instruction is executed with the same inputs. In addition, except for + * mask load instructions, any element in the tail of a mask result can also be + * written with the value the mask-producing operation would have calculated with vl=VLMAX + */ +enum class VTA : uint32_t { + No, // Undisturbed + Yes, // Agnostic +}; + +} // namespace biscuit diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 00000000..3d841023 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,156 @@ +# Main library + +add_library(biscuit + # Source files + assembler.cpp + assembler_compressed.cpp + assembler_crypto.cpp + assembler_floating_point.cpp + assembler_vector.cpp + code_buffer.cpp + cpuinfo.cpp + + # Headers + assembler_util.hpp + "${PROJECT_SOURCE_DIR}/include/biscuit/assembler.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/assert.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/code_buffer.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/csr.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/isa.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/label.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/registers.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/vector.hpp" + "${PROJECT_SOURCE_DIR}/include/biscuit/cpuinfo.hpp" +) +add_library(biscuit::biscuit ALIAS biscuit) + +target_include_directories(biscuit +PUBLIC + $ + $ + +PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +target_compile_features(biscuit +PRIVATE + cxx_std_20 +) + +if (MSVC) + target_compile_options(biscuit + PRIVATE + /MP + /Zi + /Zo + /permissive- + /EHsc + /utf-8 + /volatile:iso + /Zc:externConstexpr + /Zc:inline + /Zc:throwingNew + + # Warnings + /W4 + /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled + /we4101 # 'identifier': unreferenced local variable + /we4265 # 'class': class has virtual functions, but destructor is not virtual + /we4287 # 'operator' : unsigned/negative constant mismatch + /we4365 # 'action' : conversion from 'type_1' to 'type_2', signed/unsigned mismatch + /we4388 # signed/unsigned mismatch + /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect + /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? + /we4555 # Expression has no effect; expected expression with side-effect + /we4715 # 'function': not all control paths return a value + /we4834 # Discarding return value of function with 'nodiscard' attribute + /we5038 # data member 'member1' will be initialized after data member 'member2' + ) +elseif (("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") OR ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")) + target_compile_options(biscuit + PRIVATE + -Wall + -Wextra + -Wconversion + -Wsign-conversion + + -Werror=array-bounds + -Werror=cast-qual + -Werror=ignored-qualifiers + -Werror=implicit-fallthrough + -Werror=sign-compare + -Werror=reorder + -Werror=uninitialized + -Werror=unused-function + -Werror=unused-result + -Werror=unused-variable + ) +endif() + +if (BISCUIT_CODE_BUFFER_MMAP) + target_compile_definitions(biscuit + PRIVATE + -DBISCUIT_CODE_BUFFER_MMAP + ) +endif() + +# Install target + +include(GNUInstallDirs) +set(BISCUIT_INSTALL_CONFIGDIR "${CMAKE_INSTALL_LIBDIR}/cmake/biscuit") + +# Set install target and relevant includes. +install(TARGETS biscuit + EXPORT biscuit-targets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" +) +install( + DIRECTORY "${PROJECT_SOURCE_DIR}/include/" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" +) + +# Export targets to a script +install(EXPORT biscuit-targets + FILE + biscuit-targets.cmake + NAMESPACE + biscuit:: + DESTINATION + "${BISCUIT_INSTALL_CONFIGDIR}" +) + +# Now create the config version script +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/biscuit-config-version.cmake" + VERSION + ${PROJECT_VERSION} + COMPATIBILITY + SameMajorVersion +) + +configure_package_config_file( + "${PROJECT_SOURCE_DIR}/cmake/biscuit-config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/biscuit-config.cmake" + + INSTALL_DESTINATION "${BISCUIT_INSTALL_CONFIGDIR}" +) + +# Now install the config and version files. +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/biscuit-config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/biscuit-config-version.cmake" + + DESTINATION "${BISCUIT_INSTALL_CONFIGDIR}" +) + +# Export library from the build tree. +export(EXPORT biscuit-targets + FILE + "${CMAKE_CURRENT_BINARY_DIR}/biscuit-targets.cmake" + NAMESPACE + biscuit:: +) +export(PACKAGE biscuit) diff --git a/src/assembler.cpp b/src/assembler.cpp new file mode 100644 index 00000000..a44da23d --- /dev/null +++ b/src/assembler.cpp @@ -0,0 +1,1401 @@ +#include +#include + +#include +#include +#include + +#include "assembler_util.hpp" + +namespace biscuit { + +Assembler::Assembler(size_t capacity) + : m_buffer(capacity) {} + +Assembler::Assembler(uint8_t* buffer, size_t capacity, ArchFeature features) + : m_buffer(buffer, capacity), m_features{features} {} + +Assembler::~Assembler() = default; + +CodeBuffer& Assembler::GetCodeBuffer() { + return m_buffer; +} + +CodeBuffer Assembler::SwapCodeBuffer(CodeBuffer&& buffer) noexcept { + return std::exchange(m_buffer, std::move(buffer)); +} + +void Assembler::Bind(Label* label) { + BindToOffset(label, m_buffer.GetCursorOffset()); +} + +void Assembler::ADD(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b000, rd, 0b0110011); +} + +void Assembler::ADDI(GPR rd, GPR rs, int32_t imm) noexcept { + EmitIType(m_buffer, static_cast(imm), rs, 0b000, rd, 0b0010011); +} + +void Assembler::AND(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b111, rd, 0b0110011); +} + +void Assembler::ANDI(GPR rd, GPR rs, uint32_t imm) noexcept { + EmitIType(m_buffer, imm, rs, 0b111, rd, 0b0010011); +} + +void Assembler::AUIPC(GPR rd, int32_t imm) noexcept { + EmitUType(m_buffer, static_cast(imm), rd, 0b0010111); +} + +void Assembler::BEQ(GPR rs1, GPR rs2, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BEQ(rs1, rs2, static_cast(address)); +} + +void Assembler::BEQZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BEQZ(rs, static_cast(address)); +} + +void Assembler::BGE(GPR rs1, GPR rs2, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BGE(rs1, rs2, static_cast(address)); +} + +void Assembler::BGEU(GPR rs1, GPR rs2, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BGEU(rs1, rs2, static_cast(address)); +} + +void Assembler::BGEZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BGEZ(rs, static_cast(address)); +} + +void Assembler::BGT(GPR rs, GPR rt, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BGT(rs, rt, static_cast(address)); +} + +void Assembler::BGTU(GPR rs, GPR rt, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BGTU(rs, rt, static_cast(address)); +} + +void Assembler::BGTZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BGTZ(rs, static_cast(address)); +} + +void Assembler::BLE(GPR rs, GPR rt, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BLE(rs, rt, static_cast(address)); +} + +void Assembler::BLEU(GPR rs, GPR rt, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BLEU(rs, rt, static_cast(address)); +} + +void Assembler::BLEZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BLEZ(rs, static_cast(address)); +} + +void Assembler::BLT(GPR rs1, GPR rs2, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BLT(rs1, rs2, static_cast(address)); +} + +void Assembler::BLTU(GPR rs1, GPR rs2, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BLTU(rs1, rs2, static_cast(address)); +} + +void Assembler::BLTZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BLTZ(rs, static_cast(address)); +} + +void Assembler::BNE(GPR rs1, GPR rs2, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BNE(rs1, rs2, static_cast(address)); +} + +void Assembler::BNEZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BNEZ(rs, static_cast(address)); +} + +void Assembler::BEQ(GPR rs1, GPR rs2, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidBTypeImm(imm)); + EmitBType(m_buffer, static_cast(imm), rs2, rs1, 0b000, 0b1100011); +} + +void Assembler::BEQZ(GPR rs, int32_t imm) noexcept { + BEQ(rs, x0, imm); +} + +void Assembler::BGE(GPR rs1, GPR rs2, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidBTypeImm(imm)); + EmitBType(m_buffer, static_cast(imm), rs2, rs1, 0b101, 0b1100011); +} + +void Assembler::BGEU(GPR rs1, GPR rs2, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidBTypeImm(imm)); + EmitBType(m_buffer, static_cast(imm), rs2, rs1, 0b111, 0b1100011); +} + +void Assembler::BGEZ(GPR rs, int32_t imm) noexcept { + BGE(rs, x0, imm); +} + +void Assembler::BGT(GPR rs, GPR rt, int32_t imm) noexcept { + BLT(rt, rs, imm); +} + +void Assembler::BGTU(GPR rs, GPR rt, int32_t imm) noexcept { + BLTU(rt, rs, imm); +} + +void Assembler::BGTZ(GPR rs, int32_t imm) noexcept { + BLT(x0, rs, imm); +} + +void Assembler::BLE(GPR rs, GPR rt, int32_t imm) noexcept { + BGE(rt, rs, imm); +} + +void Assembler::BLEU(GPR rs, GPR rt, int32_t imm) noexcept { + BGEU(rt, rs, imm); +} + +void Assembler::BLEZ(GPR rs, int32_t imm) noexcept { + BGE(x0, rs, imm); +} + +void Assembler::BLT(GPR rs1, GPR rs2, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidBTypeImm(imm)); + EmitBType(m_buffer, static_cast(imm), rs2, rs1, 0b100, 0b1100011); +} + +void Assembler::BLTU(GPR rs1, GPR rs2, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidBTypeImm(imm)); + EmitBType(m_buffer, static_cast(imm), rs2, rs1, 0b110, 0b1100011); +} + +void Assembler::BLTZ(GPR rs, int32_t imm) noexcept { + BLT(rs, x0, imm); +} + +void Assembler::BNE(GPR rs1, GPR rs2, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidBTypeImm(imm)); + EmitBType(m_buffer, static_cast(imm), rs2, rs1, 0b001, 0b1100011); +} + +void Assembler::BNEZ(GPR rs, int32_t imm) noexcept { + BNE(x0, rs, imm); +} + +void Assembler::CALL(int32_t offset) noexcept { + const auto uimm = static_cast(offset); + const auto lower = uimm & 0xFFF; + const auto upper = (uimm & 0xFFFFF000) >> 12; + const auto needs_increment = (uimm & 0x800) != 0; + + // Sign-extend the lower portion if the MSB of it is set. + const auto new_lower = needs_increment ? static_cast(lower << 20) >> 20 + : static_cast(lower); + const auto new_upper = needs_increment ? upper + 1 : upper; + + AUIPC(x1, static_cast(new_upper)); + JALR(x1, new_lower, x1); +} + +void Assembler::EBREAK() noexcept { + m_buffer.Emit32(0x00100073); +} + +void Assembler::ECALL() noexcept { + m_buffer.Emit32(0x00000073); +} + +void Assembler::FENCE() noexcept { + FENCE(FenceOrder::IORW, FenceOrder::IORW); +} + +void Assembler::FENCE(FenceOrder pred, FenceOrder succ) noexcept { + EmitFENCE(m_buffer, 0b0000, pred, succ, x0, 0b000, x0, 0b0001111); +} + +void Assembler::FENCEI(GPR rd, GPR rs, uint32_t imm) noexcept { + m_buffer.Emit32(((imm & 0xFFF) << 20) | (rs.Index() << 15) | 0x1000U | (rd.Index() << 7) | 0b0001111); +} + +void Assembler::FENCETSO() noexcept { + EmitFENCE(m_buffer, 0b1000, FenceOrder::RW, FenceOrder::RW, x0, 0b000, x0, 0b0001111); +} + +void Assembler::J(Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BISCUIT_ASSERT(IsValidJTypeImm(address)); + J(static_cast(address)); +} + +void Assembler::JAL(Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BISCUIT_ASSERT(IsValidJTypeImm(address)); + JAL(static_cast(address)); +} + +void Assembler::JAL(GPR rd, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + BISCUIT_ASSERT(IsValidJTypeImm(address)); + JAL(rd, static_cast(address)); +} + +void Assembler::J(int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidJTypeImm(imm)); + JAL(x0, imm); +} + +void Assembler::JAL(int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidJTypeImm(imm)); + EmitJType(m_buffer, static_cast(imm), x1, 0b1101111); +} + +void Assembler::JAL(GPR rd, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidJTypeImm(imm)); + EmitJType(m_buffer, static_cast(imm), rd, 0b1101111); +} + +void Assembler::JALR(GPR rs) noexcept { + JALR(x1, 0, rs); +} + +void Assembler::JALR(GPR rd, int32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs1, 0b000, rd, 0b1100111); +} + +void Assembler::JR(GPR rs) noexcept { + JALR(x0, 0, rs); +} + +void Assembler::LB(GPR rd, int32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b000, rd, 0b0000011); +} + +void Assembler::LBU(GPR rd, int32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b100, rd, 0b0000011); +} + +void Assembler::LH(GPR rd, int32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b001, rd, 0b0000011); +} + +void Assembler::LHU(GPR rd, int32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b101, rd, 0b0000011); +} + +void Assembler::LI(GPR rd, uint64_t imm) noexcept { + if (IsRV32(m_features)) { + // Depending on imm, the following instructions are emitted. + // hi20 == 0 -> ADDI + // lo12 == 0 && hi20 != 0 -> LUI + // otherwise -> LUI+ADDI + + // Add 0x800 to cancel out the signed extension of ADDI. + const auto uimm32 = static_cast(imm); + const auto hi20 = (uimm32 + 0x800) >> 12 & 0xFFFFF; + const auto lo12 = static_cast(uimm32) & 0xFFF; + GPR rs1 = zero; + + if (hi20 != 0) { + LUI(rd, hi20); + rs1 = rd; + } + + if (lo12 != 0 || hi20 == 0) { + ADDI(rd, rs1, lo12); + } + } else { + // For 64-bit imm, a sequence of up to 8 instructions (i.e. LUI+ADDIW+SLLI+ + // ADDI+SLLI+ADDI+SLLI+ADDI) is emitted. + // In the following, imm is processed from LSB to MSB while instruction emission + // is performed from MSB to LSB by calling LI() recursively. In each recursion, + // the lowest 12 bits are removed from imm and the optimal shift amount is + // calculated. Then, the remaining part of imm is processed recursively and + // LI() get called as soon as it fits into 32 bits. + + if (static_cast(static_cast(imm << 32) >> 32) == imm) { + // Depending on imm, the following instructions are emitted. + // hi20 == 0 -> ADDIW + // lo12 == 0 && hi20 != 0 -> LUI + // otherwise -> LUI+ADDIW + + // Add 0x800 to cancel out the signed extension of ADDIW. + const auto hi20 = (static_cast(imm) + 0x800) >> 12 & 0xFFFFF; + const auto lo12 = static_cast(imm) & 0xFFF; + GPR rs1 = zero; + + if (hi20 != 0) { + LUI(rd, hi20); + rs1 = rd; + } + + if (lo12 != 0 || hi20 == 0) { + ADDIW(rd, rs1, lo12); + } + return; + } + + const auto lo12 = static_cast(static_cast(imm << 52) >> 52); + // Add 0x800 to cancel out the signed extension of ADDI. + uint64_t hi52 = (imm + 0x800) >> 12; + const uint32_t shift = 12 + static_cast(std::countr_zero(hi52)); + hi52 = static_cast((static_cast(hi52 >> (shift - 12)) << shift) >> shift); + LI(rd, hi52); + SLLI(rd, rd, shift); + if (lo12 != 0) { + ADDI(rd, rd, lo12); + } + } +} + +void Assembler::LUI(GPR rd, uint32_t imm) noexcept { + EmitUType(m_buffer, imm, rd, 0b0110111); +} + +void Assembler::LW(GPR rd, int32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b010, rd, 0b0000011); +} + +void Assembler::MV(GPR rd, GPR rs) noexcept { + ADDI(rd, rs, 0); +} + +void Assembler::NEG(GPR rd, GPR rs) noexcept { + SUB(rd, x0, rs); +} + +void Assembler::NOP() noexcept { + ADDI(x0, x0, 0); +} + +void Assembler::NOT(GPR rd, GPR rs) noexcept { + XORI(rd, rs, UINT32_MAX); +} + +void Assembler::OR(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b110, rd, 0b0110011); +} + +void Assembler::ORI(GPR rd, GPR rs, uint32_t imm) noexcept { + EmitIType(m_buffer, imm, rs, 0b110, rd, 0b0010011); +} + +void Assembler::PAUSE() noexcept { + m_buffer.Emit32(0x0100000F); +} + +void Assembler::RET() noexcept { + JALR(x0, 0, x1); +} + +void Assembler::SB(GPR rs2, int32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitSType(m_buffer, static_cast(imm), rs2, rs1, 0b000, 0b0100011); +} + +void Assembler::SEQZ(GPR rd, GPR rs) noexcept { + SLTIU(rd, rs, 1); +} + +void Assembler::SGTZ(GPR rd, GPR rs) noexcept { + SLT(rd, x0, rs); +} + +void Assembler::SH(GPR rs2, int32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitSType(m_buffer, static_cast(imm), rs2, rs1, 0b001, 0b0100011); +} + +void Assembler::SLL(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b001, rd, 0b0110011); +} + +void Assembler::SLLI(GPR rd, GPR rs, uint32_t shift) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT(shift <= 31); + EmitIType(m_buffer, shift & 0x1F, rs, 0b001, rd, 0b0010011); + } else { + BISCUIT_ASSERT(shift <= 63); + EmitIType(m_buffer, shift & 0x3F, rs, 0b001, rd, 0b0010011); + } +} + +void Assembler::SLT(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b010, rd, 0b0110011); +} + +void Assembler::SLTI(GPR rd, GPR rs, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b010, rd, 0b0010011); +} + +void Assembler::SLTIU(GPR rd, GPR rs, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b011, rd, 0b0010011); +} + +void Assembler::SLTU(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b011, rd, 0b0110011); +} + +void Assembler::SLTZ(GPR rd, GPR rs) noexcept { + SLT(rd, rs, x0); +} + +void Assembler::SNEZ(GPR rd, GPR rs) noexcept { + SLTU(rd, x0, rs); +} + +void Assembler::SRA(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b101, rd, 0b0110011); +} + +void Assembler::SRAI(GPR rd, GPR rs, uint32_t shift) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT(shift <= 31); + EmitIType(m_buffer, (0b0100000 << 5) | (shift & 0x1F), rs, 0b101, rd, 0b0010011); + } else { + BISCUIT_ASSERT(shift <= 63); + EmitIType(m_buffer, (0b0100000 << 5) | (shift & 0x3F), rs, 0b101, rd, 0b0010011); + } +} + +void Assembler::SRL(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b101, rd, 0b0110011); +} + +void Assembler::SRLI(GPR rd, GPR rs, uint32_t shift) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT(shift <= 31); + EmitIType(m_buffer, shift & 0x1F, rs, 0b101, rd, 0b0010011); + } else { + BISCUIT_ASSERT(shift <= 63); + EmitIType(m_buffer, shift & 0x3F, rs, 0b101, rd, 0b0010011); + } +} + +void Assembler::SUB(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b000, rd, 0b0110011); +} + +void Assembler::SW(GPR rs2, int32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitSType(m_buffer, static_cast(imm), rs2, rs1, 0b010, 0b0100011); +} + +void Assembler::XOR(GPR rd, GPR lhs, GPR rhs) noexcept { + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b100, rd, 0b0110011); +} + +void Assembler::XORI(GPR rd, GPR rs, uint32_t imm) noexcept { + EmitIType(m_buffer, imm, rs, 0b100, rd, 0b0010011); +} + +// RV64I Instructions + +void Assembler::ADDIW(GPR rd, GPR rs, int32_t imm) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitIType(m_buffer, static_cast(imm), rs, 0b000, rd, 0b0011011); +} + +void Assembler::ADDW(GPR rd, GPR lhs, GPR rhs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b000, rd, 0b0111011); +} + +void Assembler::LD(GPR rd, int32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b011, rd, 0b0000011); +} + +void Assembler::LWU(GPR rd, int32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitIType(m_buffer, static_cast(imm), rs, 0b110, rd, 0b0000011); +} + +void Assembler::SD(GPR rs2, int32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(IsValidSigned12BitImm(imm)); + EmitSType(m_buffer, static_cast(imm), rs2, rs1, 0b011, 0b0100011); +} + +void Assembler::SLLIW(GPR rd, GPR rs, uint32_t shift) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(shift <= 31); + EmitIType(m_buffer, shift & 0x1F, rs, 0b001, rd, 0b0011011); +} +void Assembler::SRAIW(GPR rd, GPR rs, uint32_t shift) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(shift <= 31); + EmitIType(m_buffer, (0b0100000 << 5) | (shift & 0x1F), rs, 0b101, rd, 0b0011011); +} +void Assembler::SRLIW(GPR rd, GPR rs, uint32_t shift) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(shift <= 31); + EmitIType(m_buffer, shift & 0x1F, rs, 0b101, rd, 0b0011011); +} + +void Assembler::SLLW(GPR rd, GPR lhs, GPR rhs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b001, rd, 0b0111011); +} +void Assembler::SRAW(GPR rd, GPR lhs, GPR rhs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b101, rd, 0b0111011); +} +void Assembler::SRLW(GPR rd, GPR lhs, GPR rhs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b101, rd, 0b0111011); +} + +void Assembler::SUBW(GPR rd, GPR lhs, GPR rhs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b000, rd, 0b0111011); +} + +// Zawrs Extension Instructions + +void Assembler::WRS_NTO() noexcept { + EmitIType(m_buffer, 0b01101, x0, 0, x0, 0b1110011); +} +void Assembler::WRS_STO() noexcept { + EmitIType(m_buffer, 0b11101, x0, 0, x0, 0b1110011); +} + +// Zacas Extension Instructions + +void Assembler::AMOCAS_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT((rd.Index() % 2) == 0); + BISCUIT_ASSERT((rs1.Index() % 2) == 0); + BISCUIT_ASSERT((rs2.Index() % 2) == 0); + } + EmitAtomic(m_buffer, 0b00101, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOCAS_Q(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + + // Both rd and rs2 indicate a register pair, so they need to be even-numbered. + BISCUIT_ASSERT((rd.Index() % 2) == 0); + BISCUIT_ASSERT((rs1.Index() % 2) == 0); + BISCUIT_ASSERT((rs2.Index() % 2) == 0); + EmitAtomic(m_buffer, 0b00101, ordering, rs2, rs1, 0b100, rd, 0b0101111); +} +void Assembler::AMOCAS_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b00101, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} + +// Zicond Extension Instructions + +void Assembler::CZERO_EQZ(GPR rd, GPR value, GPR condition) noexcept { + EmitRType(m_buffer, 0b0000111, condition, value, 0b101, rd, 0b0110011); +} +void Assembler::CZERO_NEZ(GPR rd, GPR value, GPR condition) noexcept { + EmitRType(m_buffer, 0b0000111, condition, value, 0b111, rd, 0b0110011); +} + +// Zicsr Extension Instructions + +void Assembler::CSRRC(GPR rd, CSR csr, GPR rs) noexcept { + EmitIType(m_buffer, static_cast(csr), rs, 0b011, rd, 0b1110011); +} +void Assembler::CSRRCI(GPR rd, CSR csr, uint32_t imm) noexcept { + BISCUIT_ASSERT(imm <= 0x1F); + EmitIType(m_buffer, static_cast(csr), GPR{imm & 0x1F}, 0b111, rd, 0b1110011); +} +void Assembler::CSRRS(GPR rd, CSR csr, GPR rs) noexcept { + EmitIType(m_buffer, static_cast(csr), rs, 0b010, rd, 0b1110011); +} +void Assembler::CSRRSI(GPR rd, CSR csr, uint32_t imm) noexcept { + BISCUIT_ASSERT(imm <= 0x1F); + EmitIType(m_buffer, static_cast(csr), GPR{imm & 0x1F}, 0b110, rd, 0b1110011); +} +void Assembler::CSRRW(GPR rd, CSR csr, GPR rs) noexcept { + EmitIType(m_buffer, static_cast(csr), rs, 0b001, rd, 0b1110011); +} +void Assembler::CSRRWI(GPR rd, CSR csr, uint32_t imm) noexcept { + BISCUIT_ASSERT(imm <= 0x1F); + EmitIType(m_buffer, static_cast(csr), GPR{imm & 0x1F}, 0b101, rd, 0b1110011); +} + +void Assembler::CSRR(GPR rd, CSR csr) noexcept { + CSRRS(rd, csr, x0); +} +void Assembler::CSWR(CSR csr, GPR rs) noexcept { + CSRRW(x0, csr, rs); +} + +void Assembler::CSRS(CSR csr, GPR rs) noexcept { + CSRRS(x0, csr, rs); +} +void Assembler::CSRC(CSR csr, GPR rs) noexcept { + CSRRC(x0, csr, rs); +} + +void Assembler::CSRCI(CSR csr, uint32_t imm) noexcept { + CSRRCI(x0, csr, imm); +} +void Assembler::CSRSI(CSR csr, uint32_t imm) noexcept { + CSRRSI(x0, csr, imm); +} +void Assembler::CSRWI(CSR csr, uint32_t imm) noexcept { + CSRRWI(x0, csr, imm); +} + +void Assembler::FRCSR(GPR rd) noexcept { + CSRRS(rd, CSR::FCSR, x0); +} +void Assembler::FSCSR(GPR rd, GPR rs) noexcept { + CSRRW(rd, CSR::FCSR, rs); +} +void Assembler::FSCSR(GPR rs) noexcept { + CSRRW(x0, CSR::FCSR, rs); +} + +void Assembler::FRRM(GPR rd) noexcept { + CSRRS(rd, CSR::FRM, x0); +} +void Assembler::FSRM(GPR rd, GPR rs) noexcept { + CSRRW(rd, CSR::FRM, rs); +} +void Assembler::FSRM(GPR rs) noexcept { + CSRRW(x0, CSR::FRM, rs); +} + +void Assembler::FSRMI(GPR rd, uint32_t imm) noexcept { + CSRRWI(rd, CSR::FRM, imm); +} +void Assembler::FSRMI(uint32_t imm) noexcept { + CSRRWI(x0, CSR::FRM, imm); +} + +void Assembler::FRFLAGS(GPR rd) noexcept { + CSRRS(rd, CSR::FFlags, x0); +} +void Assembler::FSFLAGS(GPR rd, GPR rs) noexcept { + CSRRW(rd, CSR::FFlags, rs); +} +void Assembler::FSFLAGS(GPR rs) noexcept { + CSRRW(x0, CSR::FFlags, rs); +} + +void Assembler::FSFLAGSI(GPR rd, uint32_t imm) noexcept { + CSRRWI(rd, CSR::FFlags, imm); +} +void Assembler::FSFLAGSI(uint32_t imm) noexcept { + CSRRWI(x0, CSR::FFlags, imm); +} + +void Assembler::RDCYCLE(GPR rd) noexcept { + CSRRS(rd, CSR::Cycle, x0); +} +void Assembler::RDCYCLEH(GPR rd) noexcept { + CSRRS(rd, CSR::CycleH, x0); +} + +void Assembler::RDINSTRET(GPR rd) noexcept { + CSRRS(rd, CSR::InstRet, x0); +} +void Assembler::RDINSTRETH(GPR rd) noexcept { + CSRRS(rd, CSR::InstRetH, x0); +} + +void Assembler::RDTIME(GPR rd) noexcept { + CSRRS(rd, CSR::Time, x0); +} +void Assembler::RDTIMEH(GPR rd) noexcept { + CSRRS(rd, CSR::TimeH, x0); +} + +// Zihintntl Extension Instructions + +void Assembler::C_NTL_ALL() noexcept { + C_ADD(x0, x5); +} +void Assembler::C_NTL_S1() noexcept { + C_ADD(x0, x4); +} +void Assembler::C_NTL_P1() noexcept { + C_ADD(x0, x2); +} +void Assembler::C_NTL_PALL() noexcept { + C_ADD(x0, x3); +} +void Assembler::NTL_ALL() noexcept { + ADD(x0, x0, x5); +} +void Assembler::NTL_S1() noexcept { + ADD(x0, x0, x4); +} +void Assembler::NTL_P1() noexcept { + ADD(x0, x0, x2); +} +void Assembler::NTL_PALL() noexcept { + ADD(x0, x0, x3); +} + +// RV32M Extension Instructions + +void Assembler::DIV(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b100, rd, 0b0110011); +} +void Assembler::DIVU(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b101, rd, 0b0110011); +} +void Assembler::MUL(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b000, rd, 0b0110011); +} +void Assembler::MULH(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b001, rd, 0b0110011); +} +void Assembler::MULHSU(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b010, rd, 0b0110011); +} +void Assembler::MULHU(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b011, rd, 0b0110011); +} +void Assembler::REM(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b110, rd, 0b0110011); +} +void Assembler::REMU(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b111, rd, 0b0110011); +} + +// RV64M Extension Instructions + +void Assembler::DIVW(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b100, rd, 0b0111011); +} +void Assembler::DIVUW(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b101, rd, 0b0111011); +} +void Assembler::MULW(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b000, rd, 0b0111011); +} +void Assembler::REMW(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b110, rd, 0b0111011); +} +void Assembler::REMUW(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b111, rd, 0b0111011); +} + +// RV32A Extension Instructions + +void Assembler::AMOADD_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b00000, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOAND_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b01100, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOMAX_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b10100, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOMAXU_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b11100, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOMIN_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b10000, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOMINU_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b11000, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOOR_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b01000, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOSWAP_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b00001, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::AMOXOR_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b00100, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} +void Assembler::LR_W(Ordering ordering, GPR rd, GPR rs) noexcept { + EmitAtomic(m_buffer, 0b00010, ordering, x0, rs, 0b010, rd, 0b0101111); +} +void Assembler::SC_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + EmitAtomic(m_buffer, 0b00011, ordering, rs2, rs1, 0b010, rd, 0b0101111); +} + +// RV64A Extension Instructions + +void Assembler::AMOADD_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b00000, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOAND_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b01100, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOMAX_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b10100, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOMAXU_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b11100, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOMIN_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b10000, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOMINU_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b11000, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOOR_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b01000, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOSWAP_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b00001, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::AMOXOR_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b00100, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} +void Assembler::LR_D(Ordering ordering, GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b00010, ordering, x0, rs, 0b011, rd, 0b0101111); +} +void Assembler::SC_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAtomic(m_buffer, 0b00011, ordering, rs2, rs1, 0b011, rd, 0b0101111); +} + +// RVB Extension Instructions + +void Assembler::ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b000, rd, 0b0111011); +} + +void Assembler::ANDN(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0100000, rs2, rs1, 0b111, rd, 0b0110011); +} + +void Assembler::BCLR(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0100100, rs2, rs1, 0b001, rd, 0b0110011); +} + +void Assembler::BCLRI(GPR rd, GPR rs, uint32_t bit) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT(bit <= 31); + } else { + BISCUIT_ASSERT(bit <= 63); + } + + const auto imm = (0b010010U << 6) | bit; + EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0010011); +} + +void Assembler::BEXT(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0100100, rs2, rs1, 0b101, rd, 0b0110011); +} + +void Assembler::BEXTI(GPR rd, GPR rs, uint32_t bit) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT(bit <= 31); + } else { + BISCUIT_ASSERT(bit <= 63); + } + + const auto imm = (0b010010U << 6) | bit; + EmitIType(m_buffer, imm, rs, 0b101, rd, 0b0010011); +} + +void Assembler::BINV(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0110100, rs2, rs1, 0b001, rd, 0b0110011); +} + +void Assembler::BINVI(GPR rd, GPR rs, uint32_t bit) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT(bit <= 31); + } else { + BISCUIT_ASSERT(bit <= 63); + } + + const auto imm = (0b011010U << 6) | bit; + EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0010011); +} + +void Assembler::BREV8(GPR rd, GPR rs) noexcept { + EmitIType(m_buffer, 0b011010000111, rs, 0b101, rd, 0b0010011); +} + +void Assembler::BSET(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b001, rd, 0b0110011); +} + +void Assembler::BSETI(GPR rd, GPR rs, uint32_t bit) noexcept { + if (IsRV32(m_features)) { + BISCUIT_ASSERT(bit <= 31); + } else { + BISCUIT_ASSERT(bit <= 63); + } + + const auto imm = (0b001010U << 6) | bit; + EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0110011); +} + +void Assembler::CLMUL(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b001, rd, 0b0110011); +} + +void Assembler::CLMULH(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b011, rd, 0b0110011); +} + +void Assembler::CLMULR(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b010, rd, 0b0110011); +} + +void Assembler::CLZ(GPR rd, GPR rs) noexcept { + EmitIType(m_buffer, 0b011000000000, rs, 0b001, rd, 0b0010011); +} + +void Assembler::CLZW(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitIType(m_buffer, 0b011000000000, rs, 0b001, rd, 0b0011011); +} + +void Assembler::CPOP(GPR rd, GPR rs) noexcept { + EmitIType(m_buffer, 0b011000000010, rs, 0b001, rd, 0b0010011); +} + +void Assembler::CPOPW(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitIType(m_buffer, 0b011000000010, rs, 0b001, rd, 0b0011011); +} + +void Assembler::CTZ(GPR rd, GPR rs) noexcept { + EmitIType(m_buffer, 0b011000000001, rs, 0b001, rd, 0b0010011); +} + +void Assembler::CTZW(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitIType(m_buffer, 0b011000000001, rs, 0b001, rd, 0b0011011); +} + +void Assembler::MAX(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b110, rd, 0b0110011); +} + +void Assembler::MAXU(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b111, rd, 0b0110011); +} + +void Assembler::MIN(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b100, rd, 0b0110011); +} + +void Assembler::MINU(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b101, rd, 0b0110011); +} + +void Assembler::ORCB(GPR rd, GPR rs) noexcept { + EmitIType(m_buffer, 0b001010000111, rs, 0b101, rd, 0b0010011); +} + +void Assembler::ORN(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0100000, rs2, rs1, 0b110, rd, 0b0110011); +} + +void Assembler::PACK(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b100, rd, 0b0110011); +} + +void Assembler::PACKH(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b111, rd, 0b0110011); +} + +void Assembler::PACKW(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b100, rd, 0b0111011); +} + +void Assembler::REV8(GPR rd, GPR rs) noexcept { + if (IsRV32(m_features)) { + EmitIType(m_buffer, 0b011010011000, rs, 0b101, rd, 0b0010011); + } else { + EmitIType(m_buffer, 0b011010111000, rs, 0b101, rd, 0b0010011); + } +} + +void Assembler::ROL(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b001, rd, 0b0110011); +} + +void Assembler::ROLW(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b001, rd, 0b0111011); +} + +void Assembler::ROR(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b101, rd, 0b0110011); +} + +void Assembler::RORI(GPR rd, GPR rs, uint32_t rotate_amount) noexcept { + BISCUIT_ASSERT(rotate_amount <= 63); + const auto imm = (0b011000U << 6) | rotate_amount; + EmitIType(m_buffer, imm, rs, 0b101, rd, 0b0010011); +} + +void Assembler::RORIW(GPR rd, GPR rs, uint32_t rotate_amount) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(rotate_amount <= 63); + const auto imm = (0b011000U << 6) | rotate_amount; + EmitIType(m_buffer, imm, rs, 0b101, rd, 0b0011011); +} + +void Assembler::RORW(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b101, rd, 0b0111011); +} + +void Assembler::SEXTB(GPR rd, GPR rs) noexcept { + EmitIType(m_buffer, 0b011000000100, rs, 0b001, rd, 0b0010011); +} + +void Assembler::SEXTH(GPR rd, GPR rs) noexcept { + EmitIType(m_buffer, 0b011000000101, rs, 0b001, rd, 0b0010011); +} + +void Assembler::SH1ADD(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b010, rd, 0b0110011); +} + +void Assembler::SH1ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b010, rd, 0b0111011); +} + +void Assembler::SH2ADD(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b100, rd, 0b0110011); +} + +void Assembler::SH2ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b100, rd, 0b0111011); +} + +void Assembler::SH3ADD(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b110, rd, 0b0110011); +} + +void Assembler::SH3ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b110, rd, 0b0111011); +} + +void Assembler::SLLIUW(GPR rd, GPR rs, uint32_t shift_amount) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(shift_amount <= 63); + const auto imm = (0b000010U << 6) | shift_amount; + EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0011011); +} + +void Assembler::UNZIP(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitIType(m_buffer, 0b000010011111, rs, 0b101, rd, 0b0010011); +} + +void Assembler::XNOR(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0100000, rs2, rs1, 0b100, rd, 0b0110011); +} + +void Assembler::XPERM4(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b010, rd, 0b0110011); +} + +void Assembler::XPERM8(GPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b100, rd, 0b0110011); +} + +void Assembler::ZEXTH(GPR rd, GPR rs) noexcept { + if (IsRV32(m_features)) { + EmitIType(m_buffer, 0b000010000000, rs, 0b100, rd, 0b0110011); + } else { + EmitIType(m_buffer, 0b000010000000, rs, 0b100, rd, 0b0111011); + } +} + +void Assembler::ZEXTW(GPR rd, GPR rs) noexcept { + ADDUW(rd, rs, x0); +} + +void Assembler::ZIP(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitIType(m_buffer, 0b000010011110, rs, 0b001, rd, 0b0010011); +} + +// Cache Management Operation Extension Instructions + +void Assembler::CBO_CLEAN(GPR rs) noexcept { + EmitRType(m_buffer, 0b0000000, x1, rs, 0b010, x0, 0b0001111); +} + +void Assembler::CBO_FLUSH(GPR rs) noexcept { + EmitRType(m_buffer, 0b0000000, x2, rs, 0b010, x0, 0b0001111); +} + +void Assembler::CBO_INVAL(GPR rs) noexcept { + EmitRType(m_buffer, 0b0000000, x0, rs, 0b010, x0, 0b0001111); +} + +void Assembler::CBO_ZERO(GPR rs) noexcept { + EmitRType(m_buffer, 0b0000000, x4, rs, 0b010, x0, 0b0001111); +} + +void Assembler::PREFETCH_I(GPR rs, int32_t offset) noexcept { + // Offset must be able to fit in a 12-bit signed immediate and be + // cleanly divisible by 32 since the bottom 5 bits are encoded as zero. + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + BISCUIT_ASSERT(offset % 32 == 0); + EmitIType(m_buffer, static_cast(offset), rs, 0b110, x0, 0b0010011); +} + +void Assembler::PREFETCH_R(GPR rs, int32_t offset) noexcept { + // Offset must be able to fit in a 12-bit signed immediate and be + // cleanly divisible by 32 since the bottom 5 bits are encoded as zero. + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + BISCUIT_ASSERT(offset % 32 == 0); + EmitIType(m_buffer, static_cast(offset) | 0b01, rs, 0b110, x0, 0b0010011); +} + +void Assembler::PREFETCH_W(GPR rs, int32_t offset) noexcept { + // Offset must be able to fit in a 12-bit signed immediate and be + // cleanly divisible by 32 since the bottom 5 bits are encoded as zero. + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + BISCUIT_ASSERT(offset % 32 == 0); + EmitIType(m_buffer, static_cast(offset) | 0b11, rs, 0b110, x0, 0b0010011); +} + +// Privileged Instructions + +void Assembler::HFENCE_GVMA(GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0110001, rs2, rs1, 0b000, x0, 0b1110011); +} + +void Assembler::HFENCE_VVMA(GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b000, x0, 0b1110011); +} + +void Assembler::HINVAL_GVMA(GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0110011, rs2, rs1, 0b000, x0, 0b1110011); +} + +void Assembler::HINVAL_VVMA(GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b000, x0, 0b1110011); +} + +void Assembler::HLV_B(GPR rd, GPR rs) noexcept { + EmitRType(m_buffer, 0b0110000, x0, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLV_BU(GPR rd, GPR rs) noexcept { + EmitRType(m_buffer, 0b0110000, x1, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLV_D(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0110110, x0, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLV_H(GPR rd, GPR rs) noexcept { + EmitRType(m_buffer, 0b0110010, x0, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLV_HU(GPR rd, GPR rs) noexcept { + EmitRType(m_buffer, 0b0110010, x1, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLV_W(GPR rd, GPR rs) noexcept { + EmitRType(m_buffer, 0b0110100, x0, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLV_WU(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0110100, x1, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLVX_HU(GPR rd, GPR rs) noexcept { + EmitRType(m_buffer, 0b0110010, x3, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HLVX_WU(GPR rd, GPR rs) noexcept { + EmitRType(m_buffer, 0b0110100, x3, rs, 0b100, rd, 0b1110011); +} + +void Assembler::HSV_B(GPR rs2, GPR rs1) noexcept { + EmitRType(m_buffer, 0b0110001, rs2, rs1, 0b100, x0, 0b1110011); +} + +void Assembler::HSV_D(GPR rs2, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b0110111, rs2, rs1, 0b100, x0, 0b1110011); +} + +void Assembler::HSV_H(GPR rs2, GPR rs1) noexcept { + EmitRType(m_buffer, 0b0110011, rs2, rs1, 0b100, x0, 0b1110011); +} + +void Assembler::HSV_W(GPR rs2, GPR rs1) noexcept { + EmitRType(m_buffer, 0b0110101, rs2, rs1, 0b100, x0, 0b1110011); +} + +void Assembler::MRET() noexcept { + m_buffer.Emit32(0x30200073); +} + +void Assembler::SFENCE_INVAL_IR() noexcept { + m_buffer.Emit32(0x18100073U); +} + +void Assembler::SFENCE_VMA(GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0001001, rs2, rs1, 0b000, x0, 0b1110011); +} + +void Assembler::SFENCE_W_INVAL() noexcept { + m_buffer.Emit32(0x18000073U); +} + +void Assembler::SINVAL_VMA(GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b0001011, rs2, rs1, 0b000, x0, 0b1110011); +} + +void Assembler::SRET() noexcept { + m_buffer.Emit32(0x10200073); +} + +void Assembler::URET() noexcept { + m_buffer.Emit32(0x00200073); +} + +void Assembler::WFI() noexcept { + m_buffer.Emit32(0x10500073); +} + +void Assembler::BindToOffset(Label* label, Label::LocationOffset offset) { + BISCUIT_ASSERT(label != nullptr); + BISCUIT_ASSERT(offset >= 0 && offset <= m_buffer.GetCursorOffset()); + + label->Bind(offset); + ResolveLabelOffsets(label); + label->ClearOffsets(); +} + +ptrdiff_t Assembler::LinkAndGetOffset(Label* label) { + BISCUIT_ASSERT(label != nullptr); + + // If we have a bound label, then it's straightforward to calculate + // the offsets. + if (label->IsBound()) { + const auto cursor_address = m_buffer.GetCursorAddress(); + const auto label_offset = m_buffer.GetOffsetAddress(*label->GetLocation()); + return static_cast(label_offset - cursor_address); + } + + // If we don't have a bound location, we return an offset of zero. + // While the emitter will emit a bogus branch instruction initially, + // the offset will be patched over once the label has been properly + // bound to a location. + label->AddOffset(m_buffer.GetCursorOffset()); + return 0; +} + +void Assembler::ResolveLabelOffsets(Label* label) { + // Conditional branch instructions make use of the B-type immediate encoding for offsets. + const auto is_b_type = [](uint32_t instruction) { + return (instruction & 0x7F) == 0b1100011; + }; + // JAL makes use of the J-type immediate encoding for offsets. + const auto is_j_type = [](uint32_t instruction) { + return (instruction & 0x7F) == 0b1101111; + }; + // C.BEQZ and C.BNEZ make use of this encoding type. + const auto is_cb_type = [](uint32_t instruction) { + const auto op = instruction & 0b11; + const auto funct3 = instruction & 0xE000; + return op == 0b01 && funct3 >= 0xC000; + }; + // C.JAL and C.J make use of this encoding type. + const auto is_cj_type = [](uint32_t instruction) { + const auto op = instruction & 0b11; + const auto funct3 = instruction & 0xE000; + return op == 0b01 && (funct3 == 0x2000 || funct3 == 0xA000); + }; + // If we know an instruction is a compressed branch, then it's a 16-bit instruction + // Otherwise it's a regular-sized 32-bit instruction. + const auto determine_inst_size = [&](uint32_t instruction) -> size_t { + if (is_cj_type(instruction) || is_cb_type(instruction)) { + return 2; + } else { + return 4; + } + }; + + const auto label_location = *label->GetLocation(); + + for (const auto offset : label->m_offsets) { + const auto address = m_buffer.GetOffsetAddress(offset); + auto* const ptr = reinterpret_cast(address); + const auto inst_size = determine_inst_size(uint32_t{*ptr} | (uint32_t{*(ptr + 1)} << 8)); + + uint32_t instruction = 0; + std::memcpy(&instruction, ptr, inst_size); + + // Given all branch instructions we need to patch have 0 encoded as + // their branch offset, we don't need to worry about any masking work. + // + // It's enough to verify that the immediate is going to be valid + // and then OR it into the instruction. + + const auto encoded_offset = label_location - offset; + + if (inst_size == sizeof(uint32_t)) { + if (is_b_type(instruction)) { + BISCUIT_ASSERT(IsValidBTypeImm(encoded_offset)); + instruction |= TransformToBTypeImm(static_cast(encoded_offset)); + } else if (is_j_type(instruction)) { + BISCUIT_ASSERT(IsValidJTypeImm(encoded_offset)); + instruction |= TransformToJTypeImm(static_cast(encoded_offset)); + } + } else { + if (is_cb_type(instruction)) { + BISCUIT_ASSERT(IsValidCBTypeImm(encoded_offset)); + instruction |= TransformToCBTypeImm(static_cast(encoded_offset)); + } else if (is_cj_type(instruction)) { + BISCUIT_ASSERT(IsValidCJTypeImm(encoded_offset)); + instruction |= TransformToCJTypeImm(static_cast(encoded_offset)); + } + } + + std::memcpy(ptr, &instruction, inst_size); + } +} + +} // namespace biscuit diff --git a/src/assembler_compressed.cpp b/src/assembler_compressed.cpp new file mode 100644 index 00000000..cca0b42e --- /dev/null +++ b/src/assembler_compressed.cpp @@ -0,0 +1,696 @@ +#include +#include + +#include +#include + +#include "assembler_util.hpp" + +// RVC Extension Instructions + +namespace biscuit { +namespace { +// Emits a compressed branch instruction. These consist of: +// funct3 | imm[8|4:3] | rs | imm[7:6|2:1|5] | op +void EmitCompressedBranch(CodeBuffer& buffer, uint32_t funct3, int32_t offset, GPR rs, uint32_t op) { + BISCUIT_ASSERT(IsValidCBTypeImm(offset)); + BISCUIT_ASSERT(IsValid3BitCompressedReg(rs)); + + const auto transformed_imm = TransformToCBTypeImm(static_cast(offset)); + const auto rs_san = CompressedRegTo3BitEncoding(rs); + buffer.Emit16(((funct3 & 0b111) << 13) | transformed_imm | (rs_san << 7) | (op & 0b11)); +} + +// Emits a compressed jump instruction. These consist of: +// funct3 | imm | op +void EmitCompressedJump(CodeBuffer& buffer, uint32_t funct3, int32_t offset, uint32_t op) { + BISCUIT_ASSERT(IsValidCJTypeImm(offset)); + BISCUIT_ASSERT((offset % 2) == 0); + + buffer.Emit16(TransformToCJTypeImm(static_cast(offset)) | + ((funct3 & 0b111) << 13) | (op & 0b11)); +} + +// Emits a compress immediate instruction. These consist of: +// funct3 | imm | rd | imm | op +void EmitCompressedImmediate(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, GPR rd, uint32_t op) { + BISCUIT_ASSERT(rd != x0); + + const auto new_imm = ((imm & 0b11111) << 2) | ((imm & 0b100000) << 7); + buffer.Emit16(((funct3 & 0b111) << 13) | new_imm | (rd.Index() << 7) | (op & 0b11)); +} + +// Emits a compressed load instruction. These consist of: +// funct3 | imm | rs1 | imm | rd | op +void EmitCompressedLoad(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, GPR rs, + Register rd, uint32_t op) { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rs)); + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + + imm &= 0xF8; + + const auto imm_enc = ((imm & 0x38) << 7) | ((imm & 0xC0) >> 1); + const auto rd_san = CompressedRegTo3BitEncoding(rd); + const auto rs_san = CompressedRegTo3BitEncoding(rs); + buffer.Emit16(((funct3 & 0b111) << 13) | imm_enc | (rs_san << 7) | (rd_san << 2) | (op & 0b11)); +} + +// Emits a compressed register arithmetic instruction. These consist of: +// funct6 | rd | funct2 | rs | op +void EmitCompressedRegArith(CodeBuffer& buffer, uint32_t funct6, GPR rd, uint32_t funct2, + GPR rs, uint32_t op) { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rs)); + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + + const auto rd_san = CompressedRegTo3BitEncoding(rd); + const auto rs_san = CompressedRegTo3BitEncoding(rs); + buffer.Emit16(((funct6 & 0b111111) << 10) | (rd_san << 7) | ((funct2 & 0b11) << 5) | + (rs_san << 2) | (op & 0b11)); +} + +// Emits a compressed store instruction. These consist of: +// funct3 | imm | rs1 | imm | rs2 | op +void EmitCompressedStore(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, GPR rs1, + Register rs2, uint32_t op) { + // This has the same format as a compressed load, with rs2 taking the place of rd. + // We can reuse the code we've already written to handle this. + EmitCompressedLoad(buffer, funct3, imm, rs1, rs2, op); +} + +// Emits a compressed wide immediate instruction. These consist of: +// funct3 | imm | rd | opcode +void EmitCompressedWideImmediate(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, + GPR rd, uint32_t op) { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + + const auto rd_sanitized = CompressedRegTo3BitEncoding(rd); + buffer.Emit16(((funct3 & 0b111) << 13) | ((imm & 0xFF) << 5) | + (rd_sanitized << 2) | (op & 0b11)); +} + +void EmitCLBType(CodeBuffer& buffer, uint32_t funct6, GPR rs, uint32_t uimm, GPR rd, + uint32_t op, uint32_t b6) { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rs)); + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + BISCUIT_ASSERT(uimm <= 3); + + const auto rd_san = CompressedRegTo3BitEncoding(rd); + const auto rs_san = CompressedRegTo3BitEncoding(rs); + + buffer.Emit16((funct6 << 10) | (rs_san << 7) | (b6 << 6) | (uimm << 5) | (rd_san << 2) | op); +} + +void EmitCLHType(CodeBuffer& buffer, uint32_t funct6, GPR rs, uint32_t uimm, GPR rd, + uint32_t op, uint32_t b6) { + BISCUIT_ASSERT((uimm % 2) == 0); + BISCUIT_ASSERT(uimm <= 2); + + // Only have 1 bit of encoding space for the immediate. + const uint32_t uimm_fixed = uimm >> 1; + EmitCLBType(buffer, funct6, rs, uimm_fixed, rd, op, b6); +} + +// These have the same layout as the equivalent loads, we just essentially alias +// the name of those to provide better intent at the call site. +void EmitCSBType(CodeBuffer& buffer, uint32_t funct6, GPR rs, uint32_t uimm, GPR rd, uint32_t op) { + EmitCLBType(buffer, funct6, rs, uimm, rd, op, 0); +} +void EmitCSHType(CodeBuffer& buffer, uint32_t funct6, GPR rs, uint32_t uimm, GPR rd, uint32_t op) { + EmitCLHType(buffer, funct6, rs, uimm, rd, op, 0); +} + +void EmitCUType(CodeBuffer& buffer, uint32_t funct6, GPR rd, uint32_t funct5, uint32_t op) { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + const auto rd_san = CompressedRegTo3BitEncoding(rd); + + buffer.Emit16((funct6 << 10) | (rd_san << 7) | (funct5 << 2) | op); +} + +void EmitCMJTType(CodeBuffer& buffer, uint32_t funct6, uint32_t index, uint32_t op) { + buffer.Emit16((funct6 << 10) | (index << 2) | op); +} + +void EmitCMMVType(CodeBuffer& buffer, uint32_t funct6, GPR r1s, uint32_t funct2, GPR r2s, uint32_t op) { + const auto is_valid_s_register = [](GPR reg) { + return reg == s0 || reg == s1 || (reg >= s2 && reg <= s7); + }; + + BISCUIT_ASSERT(r1s != r2s); + BISCUIT_ASSERT(is_valid_s_register(r1s)); + BISCUIT_ASSERT(is_valid_s_register(r2s)); + + const auto r1s_san = r1s.Index() & 0b111; + const auto r2s_san = r2s.Index() & 0b111; + + buffer.Emit16((funct6 << 10) | (r1s_san << 7) | (funct2 << 5) | (r2s_san << 2) | op); +} + +void EmitCMPPType(CodeBuffer& buffer, uint32_t funct6, uint32_t funct2, PushPopList reglist, + int32_t stack_adj, uint32_t op, ArchFeature feature) { + BISCUIT_ASSERT(stack_adj % 16 == 0); + + static constexpr std::array stack_adj_bases_rv32{ + 0U, 0U, 0U, 0U, 16U, 16U, 16U, 16U, + 32U, 32U, 32U, 32U, 48U, 48U, 48U, 64U, + }; + static constexpr std::array stack_adj_bases_rv64{ + 0U, 0U, 0U, 0U, 16U, 16U, 32U, 32U, + 48U, 48U, 64U, 64U, 80U, 80U, 96U, 112U + }; + + const auto bitmask = reglist.GetBitmask(); + const auto stack_adj_base = IsRV64(feature) ? stack_adj_bases_rv64[bitmask] + : stack_adj_bases_rv32[bitmask]; + const auto stack_adj_u = static_cast(std::abs(stack_adj)); + const auto spimm = (stack_adj_u - stack_adj_base) / 16U; + + // We can only encode up to three differenct values as the upper spimm bits. + // Ensure we catch any cases where we end up going outside of them. + BISCUIT_ASSERT(stack_adj_u == stack_adj_base || + stack_adj_u == stack_adj_base + 16 || + stack_adj_u == stack_adj_base + 32 || + stack_adj_u == stack_adj_base + 48); + + buffer.Emit16((funct6 << 10) | (funct2 << 8) | (bitmask << 4) | (spimm << 2) | op); +} +} // Anonymous namespace + +void Assembler::C_ADD(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(rs != x0); + m_buffer.Emit16(0x9002 | (rd.Index() << 7) | (rs.Index() << 2)); +} + +void Assembler::C_ADDI(GPR rd, int32_t imm) noexcept { + BISCUIT_ASSERT(imm != 0); + BISCUIT_ASSERT(IsValidSigned6BitImm(imm)); + EmitCompressedImmediate(m_buffer, 0b000, static_cast(imm), rd, 0b01); +} + +void Assembler::C_ADDIW(GPR rd, int32_t imm) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + BISCUIT_ASSERT(IsValidSigned6BitImm(imm)); + EmitCompressedImmediate(m_buffer, 0b001, static_cast(imm), rd, 0b01); +} + +void Assembler::C_ADDI4SPN(GPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(imm != 0); + BISCUIT_ASSERT(imm <= 1020); + BISCUIT_ASSERT(imm % 4 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x030) << 2) | + ((imm & 0x3C0) >> 4) | + ((imm & 0x004) >> 1) | + ((imm & 0x008) >> 3); + // clang-format on + + EmitCompressedWideImmediate(m_buffer, 0b000, new_imm, rd, 0b00); +} + +void Assembler::C_ADDW(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + EmitCompressedRegArith(m_buffer, 0b100111, rd, 0b01, rs, 0b01); +} + +void Assembler::C_ADDI16SP(int32_t imm) noexcept { + BISCUIT_ASSERT(imm != 0); + BISCUIT_ASSERT(imm >= -512 && imm <= 496); + BISCUIT_ASSERT(imm % 16 == 0); + + // clang-format off + const auto uimm = static_cast(imm); + const auto new_imm = ((uimm & 0x020) >> 3) | + ((uimm & 0x180) >> 4) | + ((uimm & 0x040) >> 1) | + ((uimm & 0x010) << 2) | + ((uimm & 0x200) << 3); + // clang-format on + + m_buffer.Emit16(0x6000U | new_imm | (x2.Index() << 7) | 0b01U); +} + +void Assembler::C_AND(GPR rd, GPR rs) noexcept { + EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b11, rs, 0b01); +} + +void Assembler::C_ANDI(GPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + + constexpr auto base = 0x8801U; + const auto shift_enc = ((imm & 0b11111) << 2) | ((imm & 0b100000) << 7); + const auto reg = CompressedRegTo3BitEncoding(rd); + + m_buffer.Emit16(base | shift_enc | (reg << 7)); +} + +void Assembler::C_BEQZ(GPR rs, int32_t offset) noexcept { + EmitCompressedBranch(m_buffer, 0b110, offset, rs, 0b01); +} + +void Assembler::C_BEQZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + C_BEQZ(rs, static_cast(address)); +} + +void Assembler::C_BNEZ(GPR rs, int32_t offset) noexcept { + EmitCompressedBranch(m_buffer, 0b111, offset, rs, 0b01); +} + +void Assembler::C_BNEZ(GPR rs, Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + C_BNEZ(rs, static_cast(address)); +} + +void Assembler::C_EBREAK() noexcept { + m_buffer.Emit16(0x9002); +} + +void Assembler::C_FLD(FPR rd, uint32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV32OrRV64(m_features)); + BISCUIT_ASSERT(imm <= 248); + BISCUIT_ASSERT(imm % 8 == 0); + + EmitCompressedLoad(m_buffer, 0b001, imm, rs, rd, 0b00); +} + +void Assembler::C_FLDSP(FPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV32OrRV64(m_features)); + BISCUIT_ASSERT(imm <= 504); + BISCUIT_ASSERT(imm % 8 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x018) << 2) | + ((imm & 0x1C0) >> 4) | + ((imm & 0x020) << 7); + // clang-format on + + m_buffer.Emit16(0x2002U | (rd.Index() << 7) | new_imm); +} + +void Assembler::C_FLW(FPR rd, uint32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + BISCUIT_ASSERT(imm <= 124); + BISCUIT_ASSERT(imm % 4 == 0); + + imm &= 0x7C; + const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78); + EmitCompressedLoad(m_buffer, 0b011, new_imm, rs, rd, 0b00); +} + +void Assembler::C_FLWSP(FPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + BISCUIT_ASSERT(imm <= 252); + BISCUIT_ASSERT(imm % 4 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x020) << 7) | + ((imm & 0x0C0) >> 4) | + ((imm & 0x01C) << 2); + // clang-format on + + m_buffer.Emit16(0x6002U | (rd.Index() << 7) | new_imm); +} + +void Assembler::C_FSD(FPR rs2, uint32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV32OrRV64(m_features)); + BISCUIT_ASSERT(imm <= 248); + BISCUIT_ASSERT(imm % 8 == 0); + + EmitCompressedStore(m_buffer, 0b101, imm, rs1, rs2, 0b00); +} + +void Assembler::C_FSDSP(FPR rs, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV32OrRV64(m_features)); + BISCUIT_ASSERT(imm <= 504); + BISCUIT_ASSERT(imm % 8 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x038) << 7) | + ((imm & 0x1C0) << 1); + // clang-format on + + m_buffer.Emit16(0xA002U | (rs.Index() << 2) | new_imm); +} + +void Assembler::C_J(Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + C_J(static_cast(address)); +} + +void Assembler::C_J(int32_t offset) noexcept { + EmitCompressedJump(m_buffer, 0b101, offset, 0b01); +} + +void Assembler::C_JAL(Label* label) noexcept { + const auto address = LinkAndGetOffset(label); + C_JAL(static_cast(address)); +} + +void Assembler::C_JAL(int32_t offset) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitCompressedJump(m_buffer, 0b001, offset, 0b01); +} + +void Assembler::C_FSW(FPR rs2, uint32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + BISCUIT_ASSERT(imm <= 124); + BISCUIT_ASSERT(imm % 4 == 0); + + imm &= 0x7C; + const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78); + EmitCompressedStore(m_buffer, 0b111, new_imm, rs1, rs2, 0b00); +} + +void Assembler::C_FSWSP(FPR rs, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + BISCUIT_ASSERT(imm <= 252); + BISCUIT_ASSERT(imm % 4 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x0C0) << 1) | + ((imm & 0x03C) << 7); + // clang-format on + + m_buffer.Emit16(0xE002U | (rs.Index() << 2) | new_imm); +} + +void Assembler::C_JALR(GPR rs) noexcept { + BISCUIT_ASSERT(rs != x0); + m_buffer.Emit16(0x9002 | (rs.Index() << 7)); +} + +void Assembler::C_JR(GPR rs) noexcept { + BISCUIT_ASSERT(rs != x0); + m_buffer.Emit16(0x8002 | (rs.Index() << 7)); +} + +void Assembler::C_LD(GPR rd, uint32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + BISCUIT_ASSERT(imm <= 248); + BISCUIT_ASSERT(imm % 8 == 0); + + EmitCompressedLoad(m_buffer, 0b011, imm, rs, rd, 0b00); +} + +void Assembler::C_LDSP(GPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + BISCUIT_ASSERT(rd != x0); + BISCUIT_ASSERT(imm <= 504); + BISCUIT_ASSERT(imm % 8 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x018) << 2) | + ((imm & 0x1C0) >> 4) | + ((imm & 0x020) << 7); + // clang-format on + + m_buffer.Emit16(0x6002U | (rd.Index() << 7) | new_imm); +} + +void Assembler::C_LI(GPR rd, int32_t imm) noexcept { + BISCUIT_ASSERT(IsValidSigned6BitImm(imm)); + EmitCompressedImmediate(m_buffer, 0b010, static_cast(imm), rd, 0b01); +} + +void Assembler::C_LQ(GPR rd, uint32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV128(m_features)); + BISCUIT_ASSERT(imm <= 496); + BISCUIT_ASSERT(imm % 16 == 0); + + imm &= 0x1F0; + const auto new_imm = ((imm & 0x100) >> 5) | (imm & 0xF0); + EmitCompressedLoad(m_buffer, 0b001, new_imm, rs, rd, 0b00); +} + +void Assembler::C_LQSP(GPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV128(m_features)); + BISCUIT_ASSERT(rd != x0); + BISCUIT_ASSERT(imm <= 1008); + BISCUIT_ASSERT(imm % 16 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x020) << 7) | + ((imm & 0x010) << 2) | + ((imm & 0x3C0) >> 4); + // clang-format on + + m_buffer.Emit16(0x2002U | (rd.Index() << 7) | new_imm); +} + +void Assembler::C_LUI(GPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(imm != 0); + BISCUIT_ASSERT(rd != x0 && rd != x2); + + const auto new_imm = (imm & 0x3F000) >> 12; + EmitCompressedImmediate(m_buffer, 0b011, new_imm, rd, 0b01); +} + +void Assembler::C_LW(GPR rd, uint32_t imm, GPR rs) noexcept { + BISCUIT_ASSERT(imm <= 124); + BISCUIT_ASSERT(imm % 4 == 0); + + imm &= 0x7C; + const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78); + EmitCompressedLoad(m_buffer, 0b010, new_imm, rs, rd, 0b00); +} + +void Assembler::C_LWSP(GPR rd, uint32_t imm) noexcept { + BISCUIT_ASSERT(rd != x0); + BISCUIT_ASSERT(imm <= 252); + BISCUIT_ASSERT(imm % 4 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x020) << 7) | + ((imm & 0x0C0) >> 4) | + ((imm & 0x01C) << 2); + // clang-format on + + m_buffer.Emit16(0x4002U | (rd.Index() << 7) | new_imm); +} + +void Assembler::C_MV(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(rd != x0); + BISCUIT_ASSERT(rs != x0); + m_buffer.Emit16(0x8002 | (rd.Index() << 7) | (rs.Index() << 2)); +} + +void Assembler::C_NOP() noexcept { + m_buffer.Emit16(1); +} + +void Assembler::C_OR(GPR rd, GPR rs) noexcept { + EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b10, rs, 0b01); +} + +void Assembler::C_SD(GPR rs2, uint32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + BISCUIT_ASSERT(imm <= 248); + BISCUIT_ASSERT(imm % 8 == 0); + + EmitCompressedLoad(m_buffer, 0b111, imm, rs1, rs2, 0b00); +} + +void Assembler::C_SDSP(GPR rs, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + BISCUIT_ASSERT(imm <= 504); + BISCUIT_ASSERT(imm % 8 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x038) << 7) | + ((imm & 0x1C0) << 1); + // clang-format on + + m_buffer.Emit16(0xE002U | (rs.Index() << 2) | new_imm); +} + +void Assembler::C_SLLI(GPR rd, uint32_t shift) noexcept { + BISCUIT_ASSERT(rd != x0); + BISCUIT_ASSERT(IsValidCompressedShiftAmount(shift)); + + // RV128C encodes a 64-bit shift with an encoding of 0. + if (shift == 64) { + BISCUIT_ASSERT(IsRV128(m_features)); + shift = 0; + } + + const auto shift_enc = ((shift & 0b11111) << 2) | ((shift & 0b100000) << 7); + m_buffer.Emit16(0x0002U | shift_enc | (rd.Index() << 7)); +} + +void Assembler::C_SQ(GPR rs2, uint32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV128(m_features)); + BISCUIT_ASSERT(imm <= 496); + BISCUIT_ASSERT(imm % 16 == 0); + + imm &= 0x1F0; + const auto new_imm = ((imm & 0x100) >> 5) | (imm & 0xF0); + EmitCompressedStore(m_buffer, 0b101, new_imm, rs1, rs2, 0b00); +} + +void Assembler::C_SQSP(GPR rs, uint32_t imm) noexcept { + BISCUIT_ASSERT(IsRV128(m_features)); + BISCUIT_ASSERT(imm <= 1008); + BISCUIT_ASSERT(imm % 16 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x3C0) << 1) | + ((imm & 0x030) << 7); + // clang-format on + + m_buffer.Emit16(0xA002U | (rs.Index() << 2) | new_imm); +} + +void Assembler::C_SRAI(GPR rd, uint32_t shift) noexcept { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + BISCUIT_ASSERT(IsValidCompressedShiftAmount(shift)); + + // RV128C encodes a 64-bit shift with an encoding of 0. + if (shift == 64) { + BISCUIT_ASSERT(IsRV128(m_features)); + shift = 0; + } + + constexpr auto base = 0x8401U; + const auto shift_enc = ((shift & 0b11111) << 2) | ((shift & 0b100000) << 7); + const auto reg = CompressedRegTo3BitEncoding(rd); + + m_buffer.Emit16(base | shift_enc | (reg << 7)); +} + +void Assembler::C_SRLI(GPR rd, uint32_t shift) noexcept { + BISCUIT_ASSERT(IsValid3BitCompressedReg(rd)); + BISCUIT_ASSERT(IsValidCompressedShiftAmount(shift)); + + // RV128C encodes a 64-bit shift with an encoding of 0. + if (shift == 64) { + BISCUIT_ASSERT(IsRV128(m_features)); + shift = 0; + } + + constexpr auto base = 0x8001U; + const auto shift_enc = ((shift & 0b11111) << 2) | ((shift & 0b100000) << 7); + const auto reg = CompressedRegTo3BitEncoding(rd); + + m_buffer.Emit16(base | shift_enc | (reg << 7)); +} + +void Assembler::C_SUB(GPR rd, GPR rs) noexcept { + EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b00, rs, 0b01); +} + +void Assembler::C_SUBW(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + EmitCompressedRegArith(m_buffer, 0b100111, rd, 0b00, rs, 0b01); +} + +void Assembler::C_SW(GPR rs2, uint32_t imm, GPR rs1) noexcept { + BISCUIT_ASSERT(imm <= 124); + BISCUIT_ASSERT(imm % 4 == 0); + + imm &= 0x7C; + const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78); + EmitCompressedStore(m_buffer, 0b110, new_imm, rs1, rs2, 0b00); +} + +void Assembler::C_SWSP(GPR rs, uint32_t imm) noexcept { + BISCUIT_ASSERT(imm <= 252); + BISCUIT_ASSERT(imm % 4 == 0); + + // clang-format off + const auto new_imm = ((imm & 0x0C0) << 1) | + ((imm & 0x03C) << 7); + // clang-format on + + m_buffer.Emit16(0xC002U | (rs.Index() << 2) | new_imm); +} + +void Assembler::C_UNDEF() noexcept { + m_buffer.Emit16(0); +} + +void Assembler::C_XOR(GPR rd, GPR rs) noexcept { + EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b01, rs, 0b01); +} + +// Zc Extension Instructions + +void Assembler::C_LBU(GPR rd, uint32_t uimm, GPR rs) noexcept { + // C.LBU swaps the ordering of the immediate. + const auto uimm_fixed = ((uimm & 0b01) << 1) | ((uimm & 0b10) >> 1); + + EmitCLBType(m_buffer, 0b100000, rs, uimm_fixed, rd, 0b00, 0); +} +void Assembler::C_LH(GPR rd, uint32_t uimm, GPR rs) noexcept { + EmitCLHType(m_buffer, 0b100001, rs, uimm, rd, 0b00, 1); +} +void Assembler::C_LHU(GPR rd, uint32_t uimm, GPR rs) noexcept { + EmitCLHType(m_buffer, 0b100001, rs, uimm, rd, 0b00, 0); +} +void Assembler::C_SB(GPR rs2, uint32_t uimm, GPR rs1) noexcept { + // C.SB swaps the ordering of the immediate. + const auto uimm_fixed = ((uimm & 0b01) << 1) | ((uimm & 0b10) >> 1); + + EmitCSBType(m_buffer, 0b100010, rs1, uimm_fixed, rs2, 0b00); +} +void Assembler::C_SH(GPR rs2, uint32_t uimm, GPR rs1) noexcept { + EmitCSHType(m_buffer, 0b100011, rs1, uimm, rs2, 0b00); +} + +void Assembler::C_SEXT_B(GPR rd) noexcept { + EmitCUType(m_buffer, 0b100111, rd, 0b11001, 0b01); +} +void Assembler::C_SEXT_H(GPR rd) noexcept { + EmitCUType(m_buffer, 0b100111, rd, 0b11011, 0b01); +} +void Assembler::C_ZEXT_B(GPR rd) noexcept { + EmitCUType(m_buffer, 0b100111, rd, 0b11000, 0b01); +} +void Assembler::C_ZEXT_H(GPR rd) noexcept { + EmitCUType(m_buffer, 0b100111, rd, 0b11010, 0b01); +} +void Assembler::C_ZEXT_W(GPR rd) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitCUType(m_buffer, 0b100111, rd, 0b11100, 0b01); +} + +void Assembler::C_MUL(GPR rsd, GPR rs2) noexcept { + EmitCompressedRegArith(m_buffer, 0b100111, rsd, 0b10, rs2, 0b01); +} +void Assembler::C_NOT(GPR rd) noexcept { + EmitCUType(m_buffer, 0b100111, rd, 0b11101, 0b01); +} + +void Assembler::CM_JALT(uint32_t index) noexcept { + BISCUIT_ASSERT(index >= 32 && index <= 255); + EmitCMJTType(m_buffer, 0b101000, index, 0b10); +} +void Assembler::CM_JT(uint32_t index) noexcept { + BISCUIT_ASSERT(index <= 31); + EmitCMJTType(m_buffer, 0b101000, index, 0b10); +} + +void Assembler::CM_MVA01S(GPR r1s, GPR r2s) noexcept { + EmitCMMVType(m_buffer, 0b101011, r1s, 0b11, r2s, 0b10); +} +void Assembler::CM_MVSA01(GPR r1s, GPR r2s) noexcept { + EmitCMMVType(m_buffer, 0b101011, r1s, 0b01, r2s, 0b10); +} + +void Assembler::CM_POP(PushPopList reg_list, int32_t stack_adj) noexcept { + BISCUIT_ASSERT(stack_adj > 0); + EmitCMPPType(m_buffer, 0b101110, 0b10, reg_list, stack_adj, 0b10, m_features); +} +void Assembler::CM_POPRET(PushPopList reg_list, int32_t stack_adj) noexcept { + BISCUIT_ASSERT(stack_adj > 0); + EmitCMPPType(m_buffer, 0b101111, 0b10, reg_list, stack_adj, 0b10, m_features); +} +void Assembler::CM_POPRETZ(PushPopList reg_list, int32_t stack_adj) noexcept { + BISCUIT_ASSERT(stack_adj > 0); + EmitCMPPType(m_buffer, 0b101111, 0b00, reg_list, stack_adj, 0b10, m_features); +} +void Assembler::CM_PUSH(PushPopList reg_list, int32_t stack_adj) noexcept { + BISCUIT_ASSERT(stack_adj < 0); + EmitCMPPType(m_buffer, 0b101110, 0b00, reg_list, stack_adj, 0b10, m_features); +} + +} // namespace biscuit diff --git a/src/assembler_crypto.cpp b/src/assembler_crypto.cpp new file mode 100644 index 00000000..8d83bc41 --- /dev/null +++ b/src/assembler_crypto.cpp @@ -0,0 +1,172 @@ +#include +#include + +#include "assembler_util.hpp" + +namespace biscuit { +namespace { +void EmitAES32Instruction(CodeBuffer& buffer, uint32_t op, GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + BISCUIT_ASSERT(bs <= 0b11); + buffer.Emit32(op | (bs << 30) | (rs2.Index() << 20) | + (rs1.Index() << 15) | (rd.Index() << 7)); +} + +void EmitSM4Instruction(CodeBuffer& buffer, uint32_t op, GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + // Same behavior, function exists for a better contextual name. + EmitAES32Instruction(buffer, op, rd, rs1, rs2, bs); +} + +void EmitAES64Instruction(CodeBuffer& buffer, uint32_t op, GPR rd, GPR rs1, GPR rs2) noexcept { + buffer.Emit32(op | (rs2.Index() << 20) | (rs1.Index() << 15) | (rd.Index() << 7)); +} + +void EmitSHAInstruction(CodeBuffer& buffer, uint32_t op, GPR rd, GPR rs1, GPR rs2) noexcept { + // Same behavior, function exists for a better contextual name. + EmitAES64Instruction(buffer, op, rd, rs1, rs2); +} + +void EmitSM3Instruction(CodeBuffer& buffer, uint32_t op, GPR rd, GPR rs) noexcept { + // Same behavior, function exists for a better contextual name. + EmitAES64Instruction(buffer, op, rd, rs, x0); +} +} // Anonymous namespace + +void Assembler::AES32DSI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitAES32Instruction(m_buffer, 0x2A000033, rd, rs1, rs2, bs); +} + +void Assembler::AES32DSMI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitAES32Instruction(m_buffer, 0x2E000033, rd, rs1, rs2, bs); +} + +void Assembler::AES32ESI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitAES32Instruction(m_buffer, 0x22000033, rd, rs1, rs2, bs); +} + +void Assembler::AES32ESMI(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitAES32Instruction(m_buffer, 0x26000033, rd, rs1, rs2, bs); +} + +void Assembler::AES64DS(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAES64Instruction(m_buffer, 0x3A000033, rd, rs1, rs2); +} + +void Assembler::AES64DSM(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAES64Instruction(m_buffer, 0x3E000033, rd, rs1, rs2); +} + +void Assembler::AES64ES(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAES64Instruction(m_buffer, 0x32000033, rd, rs1, rs2); +} + +void Assembler::AES64ESM(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAES64Instruction(m_buffer, 0x36000033, rd, rs1, rs2); +} + +void Assembler::AES64IM(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAES64Instruction(m_buffer, 0x30001013, rd, rs, x0); +} + +void Assembler::AES64KS1I(GPR rd, GPR rs, uint32_t rnum) noexcept { + // RVK spec states that rnums 0xB to 0xF are reserved. + BISCUIT_ASSERT(IsRV64(m_features)); + BISCUIT_ASSERT(rnum <= 0xA); + EmitAES64Instruction(m_buffer, 0x31001013, rd, rs, GPR{rnum}); +} + +void Assembler::AES64KS2(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitAES64Instruction(m_buffer, 0x7E000033, rd, rs1, rs2); +} + +void Assembler::SHA256SIG0(GPR rd, GPR rs) noexcept { + EmitSHAInstruction(m_buffer, 0x10201013, rd, rs, x0); +} + +void Assembler::SHA256SIG1(GPR rd, GPR rs) noexcept { + EmitSHAInstruction(m_buffer, 0x10301013, rd, rs, x0); +} + +void Assembler::SHA256SUM0(GPR rd, GPR rs) noexcept { + EmitSHAInstruction(m_buffer, 0x10001013, rd, rs, x0); +} + +void Assembler::SHA256SUM1(GPR rd, GPR rs) noexcept { + EmitSHAInstruction(m_buffer, 0x10101013, rd, rs, x0); +} + +void Assembler::SHA512SIG0(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitSHAInstruction(m_buffer, 0x10601013, rd, rs, x0); +} + +void Assembler::SHA512SIG0H(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitSHAInstruction(m_buffer, 0x5C000033, rd, rs1, rs2); +} + +void Assembler::SHA512SIG0L(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitSHAInstruction(m_buffer, 0x54000033, rd, rs1, rs2); +} + +void Assembler::SHA512SIG1(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitSHAInstruction(m_buffer, 0x10701013, rd, rs, x0); +} + +void Assembler::SHA512SIG1H(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitSHAInstruction(m_buffer, 0x5E000033, rd, rs1, rs2); +} + +void Assembler::SHA512SIG1L(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitSHAInstruction(m_buffer, 0x56000033, rd, rs1, rs2); +} + +void Assembler::SHA512SUM0(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitSHAInstruction(m_buffer, 0x10401013, rd, rs, x0); +} + +void Assembler::SHA512SUM0R(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitSHAInstruction(m_buffer, 0x50000033, rd, rs1, rs2); +} + +void Assembler::SHA512SUM1(GPR rd, GPR rs) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitSHAInstruction(m_buffer, 0x10501013, rd, rs, x0); +} + +void Assembler::SHA512SUM1R(GPR rd, GPR rs1, GPR rs2) noexcept { + BISCUIT_ASSERT(IsRV32(m_features)); + EmitSHAInstruction(m_buffer, 0x52000033, rd, rs1, rs2); +} + +void Assembler::SM3P0(GPR rd, GPR rs) noexcept { + EmitSM3Instruction(m_buffer, 0x10801013, rd, rs); +} + +void Assembler::SM3P1(GPR rd, GPR rs) noexcept { + EmitSM3Instruction(m_buffer, 0x10901013, rd, rs); +} + +void Assembler::SM4ED(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + EmitSM4Instruction(m_buffer, 0x30000033, rd, rs1, rs2, bs); +} + +void Assembler::SM4KS(GPR rd, GPR rs1, GPR rs2, uint32_t bs) noexcept { + EmitSM4Instruction(m_buffer, 0x34000033, rd, rs1, rs2, bs); +} +} // namespace biscuit diff --git a/src/assembler_floating_point.cpp b/src/assembler_floating_point.cpp new file mode 100644 index 00000000..2c6d4901 --- /dev/null +++ b/src/assembler_floating_point.cpp @@ -0,0 +1,648 @@ +#include +#include + +#include +#include +#include +#include + +#include "assembler_util.hpp" + +// Various floating-point-based extension instructions. + +namespace biscuit { + +// RV32F Extension Instructions + +void Assembler::FADD_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000000, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FCLASS_S(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110000, f0, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FCVT_S_W(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101000, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_S_WU(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101000, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_W_S(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100000, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_WU_S(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100000, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FDIV_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001100, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FEQ_S(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FLE_S(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FLT_S(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FLW(FPR rd, int32_t offset, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitIType(m_buffer, static_cast(offset), rs, 0b010, rd, 0b0000111); +} +void Assembler::FMADD_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1000011); +} +void Assembler::FMAX_S(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FMIN_S(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMSUB_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1000111); +} +void Assembler::FMUL_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001000, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FMV_W_X(FPR rd, GPR rs1) noexcept { + EmitRType(m_buffer, 0b1111000, f0, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMV_X_W(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110000, f0, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FNMADD_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1001111); +} +void Assembler::FNMSUB_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1001011); +} +void Assembler::FSGNJ_S(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FSGNJN_S(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FSGNJX_S(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FSQRT_S(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0101100, f0, rs1, rmode, rd, 0b1010011); +} +void Assembler::FSUB_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000100, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FSW(FPR rs2, int32_t offset, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitSType(m_buffer, static_cast(offset), rs2, rs1, 0b010, 0b0100111); +} + +void Assembler::FABS_S(FPR rd, FPR rs) noexcept { + FSGNJX_S(rd, rs, rs); +} +void Assembler::FMV_S(FPR rd, FPR rs) noexcept { + FSGNJ_S(rd, rs, rs); +} +void Assembler::FNEG_S(FPR rd, FPR rs) noexcept { + FSGNJN_S(rd, rs, rs); +} + +// RV64F Extension Instructions + +void Assembler::FCVT_L_S(GPR rd, FPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1100000, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_LU_S(GPR rd, FPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1100000, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_S_L(FPR rd, GPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1101000, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_S_LU(FPR rd, GPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1101000, f3, rs1, static_cast(rmode), rd, 0b1010011); +} + +// RV32D Extension Instructions + +void Assembler::FADD_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000001, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FCLASS_D(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110001, f0, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FCVT_D_W(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101001, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_D_WU(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101001, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_W_D(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100001, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_WU_D(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100001, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_D_S(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100001, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_S_D(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100000, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FDIV_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001101, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FEQ_D(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FLE_D(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FLT_D(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FLD(FPR rd, int32_t offset, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitIType(m_buffer, static_cast(offset), rs, 0b011, rd, 0b0000111); +} +void Assembler::FMADD_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1000011); +} +void Assembler::FMAX_D(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010101, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FMIN_D(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010101, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMSUB_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1000111); +} +void Assembler::FMUL_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001001, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FNMADD_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1001111); +} +void Assembler::FNMSUB_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1001011); +} +void Assembler::FSGNJ_D(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FSGNJN_D(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FSGNJX_D(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FSQRT_D(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0101101, f0, rs1, rmode, rd, 0b1010011); +} +void Assembler::FSUB_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000101, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FSD(FPR rs2, int32_t offset, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitSType(m_buffer, static_cast(offset), rs2, rs1, 0b011, 0b0100111); +} + +void Assembler::FABS_D(FPR rd, FPR rs) noexcept { + FSGNJX_D(rd, rs, rs); +} +void Assembler::FMV_D(FPR rd, FPR rs) noexcept { + FSGNJ_D(rd, rs, rs); +} +void Assembler::FNEG_D(FPR rd, FPR rs) noexcept { + FSGNJN_D(rd, rs, rs); +} + +// RV64D Extension Instructions + +void Assembler::FCVT_L_D(GPR rd, FPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1100001, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_LU_D(GPR rd, FPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1100001, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_D_L(FPR rd, GPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1101001, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_D_LU(FPR rd, GPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1101001, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FMV_D_X(FPR rd, GPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + EmitRType(m_buffer, 0b1111001, f0, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMV_X_D(GPR rd, FPR rs1) noexcept { + BISCUIT_ASSERT(IsRV64OrRV128(m_features)); + EmitRType(m_buffer, 0b1110001, f0, rs1, 0b000, rd, 0b1010011); +} + +// RV32Q Extension Instructions + +void Assembler::FADD_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000011, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FCLASS_Q(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110011, f0, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FCVT_Q_W(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101011, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_Q_WU(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101011, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_W_Q(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100011, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_WU_Q(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100011, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_Q_D(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100011, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_D_Q(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100001, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_Q_S(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100011, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_S_Q(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100000, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FDIV_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001111, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FEQ_Q(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FLE_Q(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FLT_Q(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FLQ(FPR rd, int32_t offset, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitIType(m_buffer, static_cast(offset), rs, 0b100, rd, 0b0000111); +} +void Assembler::FMADD_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1000011); +} +void Assembler::FMAX_Q(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010111, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FMIN_Q(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010111, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMSUB_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1000111); +} +void Assembler::FMUL_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001011, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FNMADD_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1001111); +} +void Assembler::FNMSUB_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1001011); +} +void Assembler::FSGNJ_Q(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FSGNJN_Q(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FSGNJX_Q(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FSQRT_Q(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0101111, f0, rs1, rmode, rd, 0b1010011); +} +void Assembler::FSUB_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000111, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FSQ(FPR rs2, int32_t offset, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitSType(m_buffer, static_cast(offset), rs2, rs1, 0b100, 0b0100111); +} + +void Assembler::FABS_Q(FPR rd, FPR rs) noexcept { + FSGNJX_Q(rd, rs, rs); +} +void Assembler::FMV_Q(FPR rd, FPR rs) noexcept { + FSGNJ_Q(rd, rs, rs); +} +void Assembler::FNEG_Q(FPR rd, FPR rs) noexcept { + FSGNJN_Q(rd, rs, rs); +} + +// RV64Q Extension Instructions + +void Assembler::FCVT_L_Q(GPR rd, FPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1100011, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_LU_Q(GPR rd, FPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1100011, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_Q_L(FPR rd, GPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1101011, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_Q_LU(FPR rd, GPR rs1, RMode rmode) noexcept { + BISCUIT_ASSERT(IsRV64(m_features)); + EmitRType(m_buffer, 0b1101011, f3, rs1, static_cast(rmode), rd, 0b1010011); +} + +// RV32Zfh Extension Instructions + +void Assembler::FADD_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000010, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FCLASS_H(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110010, f0, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FCVT_D_H(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100001, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_H_D(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100010, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_H_Q(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100010, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_H_S(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100010, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_H_W(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101010, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_H_WU(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101010, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_Q_H(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100011, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_S_H(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100000, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_W_H(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100010, f0, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_WU_H(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100010, f1, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FDIV_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001110, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FEQ_H(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FLE_H(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FLH(FPR rd, int32_t offset, GPR rs) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitIType(m_buffer, static_cast(offset), rs, 0b001, rd, 0b0000111); +} +void Assembler::FLT_H(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FMADD_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1000011); +} +void Assembler::FMAX_H(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010110, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FMIN_H(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010110, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMSUB_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1000111); +} +void Assembler::FMUL_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0001010, rs2, rs1, rmode, rd, 0b1010011); +} +void Assembler::FMV_H_X(FPR rd, GPR rs1) noexcept { + EmitRType(m_buffer, 0b1111010, f0, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMV_X_H(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110010, f0, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FNMADD_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1001111); +} +void Assembler::FNMSUB_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept { + EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1001011); +} +void Assembler::FSGNJ_H(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010010, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FSGNJN_H(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010010, rs2, rs1, 0b001, rd, 0b1010011); +} +void Assembler::FSGNJX_H(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010010, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FSH(FPR rs2, int32_t offset, GPR rs1) noexcept { + BISCUIT_ASSERT(IsValidSigned12BitImm(offset)); + EmitSType(m_buffer, static_cast(offset), rs2, rs1, 0b001, 0b0100111); +} +void Assembler::FSQRT_H(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0101110, f0, rs1, rmode, rd, 0b1010011); +} +void Assembler::FSUB_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0000110, rs2, rs1, rmode, rd, 0b1010011); +} + +// RV64Zfh Extension Instructions + +void Assembler::FCVT_L_H(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100010, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_LU_H(GPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1100010, f3, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_H_L(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101010, f2, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FCVT_H_LU(FPR rd, GPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b1101010, f3, rs1, static_cast(rmode), rd, 0b1010011); +} + +// Zfa Extension Instructions + +static void FLIImpl(CodeBuffer& buffer, uint32_t funct7, FPR rd, double value) noexcept { + static constexpr std::array fli_table{ + 0xBFF0000000000000ULL, // -1.0 + 0x0010000000000000ULL, // Minimum positive normal + 0x3EF0000000000000ULL, // 1.0 * 2^-16 + 0x3F00000000000000ULL, // 1.0 * 2^-15 + 0x3F70000000000000ULL, // 1.0 * 2^-8 + 0x3F80000000000000ULL, // 1.0 * 2^-7 + 0x3FB0000000000000ULL, // 1.0 * 2^-4 + 0x3FC0000000000000ULL, // 1.0 * 2^-3 + 0x3FD0000000000000ULL, // 0.25 + 0x3FD4000000000000ULL, // 0.3125 + 0x3FD8000000000000ULL, // 0.375 + 0x3FDC000000000000ULL, // 0.4375 + 0x3FE0000000000000ULL, // 0.5 + 0x3FE4000000000000ULL, // 0.625 + 0x3FE8000000000000ULL, // 0.75 + 0x3FEC000000000000ULL, // 0.875 + 0x3FF0000000000000ULL, // 1.0 + 0x3FF4000000000000ULL, // 1.25 + 0x3FF8000000000000ULL, // 1.5 + 0x3FFC000000000000ULL, // 1.75 + 0x4000000000000000ULL, // 2.0 + 0x4004000000000000ULL, // 2.5 + 0x4008000000000000ULL, // 3 + 0x4010000000000000ULL, // 4 + 0x4020000000000000ULL, // 8 + 0x4030000000000000ULL, // 16 + 0x4060000000000000ULL, // 2^7 + 0x4070000000000000ULL, // 2^8 + 0x40E0000000000000ULL, // 2^15 + 0x40F0000000000000ULL, // 2^16 + 0x7FF0000000000000ULL, // +inf + 0x7FF8000000000000ULL, // Canonical NaN + }; + + uint64_t ivalue{}; + std::memcpy(&ivalue, &value, sizeof(uint64_t)); + + const auto iter = std::find_if(fli_table.cbegin(), fli_table.cend(), [ivalue](uint64_t entry) { + return entry == ivalue; + }); + BISCUIT_ASSERT(iter != fli_table.cend()); + + const auto index = static_cast(std::distance(fli_table.cbegin(), iter)); + EmitRType(buffer, funct7, f1, GPR{index}, 0b000, rd, 0b1010011); +} + +void Assembler::FLI_D(FPR rd, double value) noexcept { + FLIImpl(m_buffer, 0b1111001, rd, value); +} +void Assembler::FLI_H(FPR rd, double value) noexcept { + FLIImpl(m_buffer, 0b1111010, rd, value); +} +void Assembler::FLI_S(FPR rd, double value) noexcept { + FLIImpl(m_buffer, 0b1111000, rd, value); +} + +void Assembler::FMINM_D(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010101, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FMINM_H(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010110, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FMINM_Q(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010111, rs2, rs1, 0b010, rd, 0b1010011); +} +void Assembler::FMINM_S(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b010, rd, 0b1010011); +} + +void Assembler::FMAXM_D(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010101, rs2, rs1, 0b011, rd, 0b1010011); +} +void Assembler::FMAXM_H(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010110, rs2, rs1, 0b011, rd, 0b1010011); +} +void Assembler::FMAXM_Q(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010111, rs2, rs1, 0b011, rd, 0b1010011); +} +void Assembler::FMAXM_S(FPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b011, rd, 0b1010011); +} + +void Assembler::FROUND_D(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100001, f4, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FROUND_H(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100010, f4, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FROUND_Q(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100011, f4, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FROUND_S(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100000, f4, rs1, static_cast(rmode), rd, 0b1010011); +} + +void Assembler::FROUNDNX_D(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100001, f5, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FROUNDNX_H(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100010, f5, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FROUNDNX_Q(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100011, f5, rs1, static_cast(rmode), rd, 0b1010011); +} +void Assembler::FROUNDNX_S(FPR rd, FPR rs1, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100000, f5, rs1, static_cast(rmode), rd, 0b1010011); +} + +void Assembler::FCVTMOD_W_D(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1100001, f8, rs1, static_cast(RMode::RTZ), rd, 0b1010011); +} + +void Assembler::FMVH_X_D(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110001, f1, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMVH_X_Q(GPR rd, FPR rs1) noexcept { + EmitRType(m_buffer, 0b1110011, f1, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMVP_D_X(FPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b1011001, rs2, rs1, 0b000, rd, 0b1010011); +} +void Assembler::FMVP_Q_X(FPR rd, GPR rs1, GPR rs2) noexcept { + EmitRType(m_buffer, 0b1011011, rs2, rs1, 0b000, rd, 0b1010011); +} + +void Assembler::FLEQ_D(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b100, rd, 0b1010011); +} +void Assembler::FLTQ_D(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b101, rd, 0b1010011); +} + +void Assembler::FLEQ_H(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b100, rd, 0b1010011); +} +void Assembler::FLTQ_H(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b101, rd, 0b1010011); +} + +void Assembler::FLEQ_Q(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b100, rd, 0b1010011); +} +void Assembler::FLTQ_Q(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b101, rd, 0b1010011); +} + +void Assembler::FLEQ_S(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b100, rd, 0b1010011); +} +void Assembler::FLTQ_S(GPR rd, FPR rs1, FPR rs2) noexcept { + EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b101, rd, 0b1010011); +} + +// Zfbfmin, Zvfbfmin, Zvfbfwma Extension Instructions + +void Assembler::FCVT_BF16_S(FPR rd, FPR rs, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100010, f8, rs, static_cast(rmode), rd, 0b1010011); +} + +void Assembler::FCVT_S_BF16(FPR rd, FPR rs, RMode rmode) noexcept { + EmitRType(m_buffer, 0b0100000, f6, rs, static_cast(rmode), rd, 0b1010011); +} + +} // namespace biscuit diff --git a/src/assembler_util.hpp b/src/assembler_util.hpp new file mode 100644 index 00000000..fdb98940 --- /dev/null +++ b/src/assembler_util.hpp @@ -0,0 +1,224 @@ +#pragma once + +#include +#include +#include + +#include +#include + +// Generic internal utility header for various helper functions related +// to encoding instructions. + +namespace biscuit { +// Determines if a value lies within the range of a 6-bit immediate. +[[nodiscard]] constexpr bool IsValidSigned6BitImm(ptrdiff_t value) { + return value >= -32 && value <= 31; +} + +// S-type and I-type immediates are 12 bits in size +[[nodiscard]] constexpr bool IsValidSigned12BitImm(ptrdiff_t value) { + return value >= -2048 && value <= 2047; +} + +// B-type immediates only provide -4KiB to +4KiB range branches. +[[nodiscard]] constexpr bool IsValidBTypeImm(ptrdiff_t value) { + return value >= -4096 && value <= 4095; +} + +// J-type immediates only provide -1MiB to +1MiB range branches. +[[nodiscard]] constexpr bool IsValidJTypeImm(ptrdiff_t value) { + return value >= -0x80000 && value <= 0x7FFFF; +} + +// CB-type immediates only provide -256B to +256B range branches. +[[nodiscard]] constexpr bool IsValidCBTypeImm(ptrdiff_t value) { + return value >= -256 && value <= 255; +} + +// CJ-type immediates only provide -2KiB to +2KiB range branches. +[[nodiscard]] constexpr bool IsValidCJTypeImm(ptrdiff_t value) { + return IsValidSigned12BitImm(value); +} + +// Determines whether or not the register fits in 3-bit compressed encoding. +[[nodiscard]] constexpr bool IsValid3BitCompressedReg(Register reg) { + const auto index = reg.Index(); + return index >= 8 && index <= 15; +} + +// Determines whether or not the given shift amount is valid for a compressed shift instruction +[[nodiscard]] constexpr bool IsValidCompressedShiftAmount(uint32_t shift) { + return shift > 0 && shift <= 64; +} + +// Turns a compressed register into its encoding. +[[nodiscard]] constexpr uint32_t CompressedRegTo3BitEncoding(Register reg) { + return reg.Index() - 8; +} + +// Transforms a regular value into an immediate encoded in a B-type instruction. +[[nodiscard]] constexpr uint32_t TransformToBTypeImm(uint32_t imm) { + // clang-format off + return ((imm & 0x07E0) << 20) | + ((imm & 0x1000) << 19) | + ((imm & 0x001E) << 7) | + ((imm & 0x0800) >> 4); + // clang-format on +} + +// Transforms a regular value into an immediate encoded in a J-type instruction. +[[nodiscard]] constexpr uint32_t TransformToJTypeImm(uint32_t imm) { + // clang-format off + return ((imm & 0x0FF000) >> 0) | + ((imm & 0x000800) << 9) | + ((imm & 0x0007FE) << 20) | + ((imm & 0x100000) << 11); + // clang-format on +} + +// Transforms a regular value into an immediate encoded in a CB-type instruction. +[[nodiscard]] constexpr uint32_t TransformToCBTypeImm(uint32_t imm) { + // clang-format off + return ((imm & 0x0C0) >> 1) | + ((imm & 0x006) << 2) | + ((imm & 0x020) >> 3) | + ((imm & 0x018) << 7) | + ((imm & 0x100) << 4); + // clang-format on +} + +// Transforms a regular value into an immediate encoded in a CJ-type instruction. +[[nodiscard]] constexpr uint32_t TransformToCJTypeImm(uint32_t imm) { + // clang-format off + return ((imm & 0x800) << 1) | + ((imm & 0x010) << 7) | + ((imm & 0x300) << 1) | + ((imm & 0x400) >> 2) | + ((imm & 0x040) << 1) | + ((imm & 0x080) >> 1) | + ((imm & 0x00E) << 4) | + ((imm & 0x020) >> 3); + // clang-format on +} + +// Emits a B type RISC-V instruction. These consist of: +// imm[12|10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode +inline void EmitBType(CodeBuffer& buffer, uint32_t imm, GPR rs2, GPR rs1, + uint32_t funct3, uint32_t opcode) { + imm &= 0x1FFE; + + buffer.Emit32(TransformToBTypeImm(imm) | (rs2.Index() << 20) | (rs1.Index() << 15) | + ((funct3 & 0b111) << 12) | (opcode & 0x7F)); +} + +// Emits a I type RISC-V instruction. These consist of: +// imm[11:0] | rs1 | funct3 | rd | opcode +inline void EmitIType(CodeBuffer& buffer, uint32_t imm, Register rs1, uint32_t funct3, + Register rd, uint32_t opcode) { + imm &= 0xFFF; + + buffer.Emit32((imm << 20) | (rs1.Index() << 15) | ((funct3 & 0b111) << 12) | + (rd.Index() << 7) | (opcode & 0x7F)); +} + +// Emits a J type RISC-V instruction. These consist of: +// imm[20|10:1|11|19:12] | rd | opcode +inline void EmitJType(CodeBuffer& buffer, uint32_t imm, GPR rd, uint32_t opcode) { + imm &= 0x1FFFFE; + + buffer.Emit32(TransformToJTypeImm(imm) | rd.Index() << 7 | (opcode & 0x7F)); +} + +// Emits a R type RISC instruction. These consist of: +// funct7 | rs2 | rs1 | funct3 | rd | opcode +inline void EmitRType(CodeBuffer& buffer, uint32_t funct7, Register rs2, Register rs1, + uint32_t funct3, Register rd, uint32_t opcode) { + // clang-format off + const auto value = ((funct7 & 0xFF) << 25) | + (rs2.Index() << 20) | + (rs1.Index() << 15) | + ((funct3 & 0b111) << 12) | + (rd.Index() << 7) | + (opcode & 0x7F); + // clang-format off + + buffer.Emit32(value); +} + +// Emits a R type RISC instruction. These consist of: +// funct7 | rs2 | rs1 | funct3 | rd | opcode +inline void EmitRType(CodeBuffer& buffer, uint32_t funct7, FPR rs2, FPR rs1, RMode funct3, + FPR rd, uint32_t opcode) { + EmitRType(buffer, funct7, rs2, rs1, static_cast(funct3), rd, opcode); +} + +// Emits a R4 type RISC instruction. These consist of: +// rs3 | funct2 | rs2 | rs1 | funct3 | rd | opcode +inline void EmitR4Type(CodeBuffer& buffer, FPR rs3, uint32_t funct2, FPR rs2, FPR rs1, + RMode funct3, FPR rd, uint32_t opcode) { + const auto reg_bits = (rs3.Index() << 27) | (rs2.Index() << 20) | (rs1.Index() << 15) | (rd.Index() << 7); + const auto funct_bits = ((funct2 & 0b11) << 25) | (static_cast(funct3) << 12); + buffer.Emit32(reg_bits | funct_bits | (opcode & 0x7F)); +} + +// Emits a S type RISC-V instruction. These consist of: +// imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode +inline void EmitSType(CodeBuffer& buffer, uint32_t imm, Register rs2, GPR rs1, + uint32_t funct3, uint32_t opcode) { + imm &= 0xFFF; + + // clang-format off + const auto new_imm = ((imm & 0x01F) << 7) | + ((imm & 0xFE0) << 20); + // clang-format on + + buffer.Emit32(new_imm | (rs2.Index() << 20) | (rs1.Index() << 15) | + ((funct3 & 0b111) << 12) | (opcode & 0x7F)); +} + +// Emits a U type RISC-V instruction. These consist of: +// imm[31:12] | rd | opcode +inline void EmitUType(CodeBuffer& buffer, uint32_t imm, GPR rd, uint32_t opcode) { + buffer.Emit32((imm & 0x000FFFFF) << 12 | rd.Index() << 7 | (opcode & 0x7F)); +} + +// Emits an atomic instruction. +inline void EmitAtomic(CodeBuffer& buffer, uint32_t funct5, Ordering ordering, GPR rs2, GPR rs1, + uint32_t funct3, GPR rd, uint32_t opcode) noexcept { + const auto funct7 = (funct5 << 2) | static_cast(ordering); + EmitRType(buffer, funct7, rs2, rs1, funct3, rd, opcode); +} + +// Emits a fence instruction +inline void EmitFENCE(CodeBuffer& buffer, uint32_t fm, FenceOrder pred, FenceOrder succ, + GPR rs, uint32_t funct3, GPR rd, uint32_t opcode) noexcept { + // clang-format off + buffer.Emit32(((fm & 0b1111) << 28) | + (static_cast(pred) << 24) | + (static_cast(succ) << 20) | + (rs.Index() << 15) | + ((funct3 & 0b111) << 12) | + (rd.Index() << 7) | + (opcode & 0x7F)); + // clang-format on +} + +// Internal helpers for siloing away particular comparisons for behavior. +constexpr bool IsRV32(ArchFeature feature) { + return feature == ArchFeature::RV32; +} +constexpr bool IsRV64(ArchFeature feature) { + return feature == ArchFeature::RV64; +} +constexpr bool IsRV128(ArchFeature feature) { + return feature == ArchFeature::RV128; +} +constexpr bool IsRV32OrRV64(ArchFeature feature) { + return IsRV32(feature) || IsRV64(feature); +} +constexpr bool IsRV64OrRV128(ArchFeature feature) { + return IsRV64(feature) || IsRV128(feature); +} + +} // namespace biscuit diff --git a/src/assembler_vector.cpp b/src/assembler_vector.cpp new file mode 100644 index 00000000..8f12e541 --- /dev/null +++ b/src/assembler_vector.cpp @@ -0,0 +1,2146 @@ +#include +#include + +namespace biscuit { +namespace { + +enum class AddressingMode : uint32_t { + // clang-format off + UnitStride = 0b00, + IndexedUnordered = 0b01, + Strided = 0b10, + IndexedOrdered = 0b11, + // clang-format on +}; + +enum class UnitStrideLoadAddressingMode : uint32_t { + // clang-format off + Load = 0b00000, + MaskLoad = 0b01011, + LoadFaultOnlyFirst = 0b10000, + // clang-format on +}; + +enum class UnitStrideStoreAddressingMode : uint32_t { + // clang-format off + Store = 0b00000, + MaskStore = 0b01011, + // clang-format on +}; + +enum class WidthEncoding : uint32_t { + // clang-format off + E8 = 0b000, + E16 = 0b101, + E32 = 0b110, + E64 = 0b111, + // clang-format on +}; + +void EmitVectorLoadImpl(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, uint32_t lumop, GPR rs, WidthEncoding width, Vec vd) noexcept { + BISCUIT_ASSERT(nf <= 8); + + // Fit to encoding space. Allows for being more explicit about the size in calling functions + // (e.g. using 8 for 8 elements instead of 7). + if (nf != 0) { + nf -= 1; + } + + // clang-format off + const auto value = (nf << 29) | + (static_cast(mew) << 28) | + (static_cast(mop) << 26) | + (static_cast(vm) << 25) | + (lumop << 20) | + (rs.Index() << 15) | + (static_cast(width) << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b111); +} + +void EmitVectorLoad(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, UnitStrideLoadAddressingMode lumop, GPR rs, + WidthEncoding width, Vec vd) noexcept { + EmitVectorLoadImpl(buffer, nf, mew, mop, vm, static_cast(lumop), rs, width, vd); +} + +void EmitVectorLoad(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, GPR rs2, GPR rs1, WidthEncoding width, Vec vd) noexcept { + EmitVectorLoadImpl(buffer, nf, mew, mop, vm, rs2.Index(), rs1, width, vd); +} + +void EmitVectorLoad(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, Vec vs2, GPR rs1, WidthEncoding width, Vec vd) noexcept { + EmitVectorLoadImpl(buffer, nf, mew, mop, vm, vs2.Index(), rs1, width, vd); +} + +void EmitVectorLoadWholeReg(CodeBuffer& buffer, uint32_t nf, bool mew, GPR rs, + WidthEncoding width, Vec vd) noexcept { + // RISC-V V extension spec (as of 1.0RC) only allows these nf values. + BISCUIT_ASSERT(nf == 1 || nf == 2 || nf == 4 || nf == 8); + + EmitVectorLoadImpl(buffer, nf, mew, AddressingMode::UnitStride, + VecMask::No, 0b01000, rs, width, vd); +} + +void EmitVectorStoreImpl(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, uint32_t sumop, GPR rs, WidthEncoding width, Vec vd) noexcept { + BISCUIT_ASSERT(nf <= 8); + + // Fit to encoding space. Allows for being more explicit about the size in calling functions + // (e.g. using 8 for 8 elements instead of 7). + if (nf != 0) { + nf -= 1; + } + + // clang-format off + const auto value = (nf << 29) | + (static_cast(mew) << 28) | + (static_cast(mop) << 26) | + (static_cast(vm) << 25) | + (sumop << 20) | + (rs.Index() << 15) | + (static_cast(width) << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b100111); +} + +void EmitVectorStore(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, UnitStrideStoreAddressingMode lumop, GPR rs, + WidthEncoding width, Vec vs) noexcept { + EmitVectorStoreImpl(buffer, nf, mew, mop, vm, static_cast(lumop), rs, width, vs); +} + +void EmitVectorStore(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, GPR rs2, GPR rs1, WidthEncoding width, Vec vs3) noexcept { + EmitVectorStoreImpl(buffer, nf, mew, mop, vm, rs2.Index(), rs1, width, vs3); +} + +void EmitVectorStore(CodeBuffer& buffer, uint32_t nf, bool mew, AddressingMode mop, + VecMask vm, Vec vs2, GPR rs1, WidthEncoding width, Vec vs3) noexcept { + EmitVectorStoreImpl(buffer, nf, mew, mop, vm, vs2.Index(), rs1, width, vs3); +} + +void EmitVectorStoreWholeReg(CodeBuffer& buffer, uint32_t nf, GPR rs, Vec vs) noexcept { + // RISC-V V extension spec (as of 1.0RC) only allows these nf values. + BISCUIT_ASSERT(nf == 1 || nf == 2 || nf == 4 || nf == 8); + + EmitVectorStoreImpl(buffer, nf, false, AddressingMode::UnitStride, VecMask::No, + 0b01000, rs, WidthEncoding::E8, vs); +} + +void EmitVectorOPIVIImpl(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, uint32_t imm5, Vec vd) noexcept { + // clang-format off + const auto value = (funct6 << 26) | + (static_cast(vm) << 25) | + (vs2.Index() << 20) | + ((imm5 & 0b11111) << 15) | + (0b011U << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b1010111); +} + +void EmitVectorOPIVI(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, int32_t simm5, Vec vd) noexcept { + BISCUIT_ASSERT(simm5 >= -16 && simm5 <= 15); + EmitVectorOPIVIImpl(buffer, funct6, vm, vs2, static_cast(simm5), vd); +} + +void EmitVectorOPIVUI(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, uint32_t uimm5, Vec vd) noexcept { + BISCUIT_ASSERT(uimm5 <= 31); + EmitVectorOPIVIImpl(buffer, funct6, vm, vs2, uimm5, vd); +} + +void EmitVectorOPIVV(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, Vec vs1, Vec vd) noexcept { + // clang-format off + const auto value = (funct6 << 26) | + (static_cast(vm) << 25) | + (vs2.Index() << 20) | + (vs1.Index() << 15) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b1010111); +} + +void EmitVectorOPIVX(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, GPR rs1, Vec vd) noexcept { + // clang-format off + const auto value = (funct6 << 26) | + (static_cast(vm) << 25) | + (vs2.Index() << 20) | + (rs1.Index() << 15) | + (0b100U << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b1010111); +} + +void EmitVectorOPMVVImpl(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, Vec vs1, Vec vd, + uint32_t op) noexcept { + // clang-format off + const auto value = (funct6 << 26) | + (static_cast(vm) << 25) | + (vs2.Index() << 20) | + (vs1.Index() << 15) | + (0b010U << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | op); +} + +void EmitVectorOPMVV(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, Vec vs1, Vec vd) noexcept { + EmitVectorOPMVVImpl(buffer, funct6, vm, vs2, vs1, vd, 0b1010111); +} + +void EmitVectorOPMVVP(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, Vec vs1, Vec vd) noexcept { + EmitVectorOPMVVImpl(buffer, funct6, vm, vs2, vs1, vd, 0b1110111); +} + +void EmitVectorOPMVX(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, GPR rs1, Vec vd) noexcept { + // clang-format off + const auto value = (funct6 << 26) | + (static_cast(vm) << 25) | + (vs2.Index() << 20) | + (rs1.Index() << 15) | + (0b110U << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b1010111); +} + +void EmitVectorOPFVV(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, Vec vs1, Vec vd) noexcept { + // clang-format off + const auto value = (funct6 << 26) | + (static_cast(vm) << 25) | + (vs2.Index() << 20) | + (vs1.Index() << 15) | + (0b001U << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b1010111); +} + +void EmitVectorOPFVF(CodeBuffer& buffer, uint32_t funct6, VecMask vm, Vec vs2, FPR rs1, Vec vd) noexcept { + // clang-format off + const auto value = (funct6 << 26) | + (static_cast(vm) << 25) | + (vs2.Index() << 20) | + (rs1.Index() << 15) | + (0b101U << 12) | + (vd.Index() << 7); + // clang-format on + + buffer.Emit32(value | 0b1010111); +} +} // Anonymous namespace + +// Vector Integer Arithmetic Instructions + +void Assembler::VAADD(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b001001, mask, vs2, vs1, vd); +} + +void Assembler::VAADD(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001001, mask, vs2, rs1, vd); +} + +void Assembler::VAADDU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b001000, mask, vs2, vs1, vd); +} + +void Assembler::VAADDU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001000, mask, vs2, rs1, vd); +} + +void Assembler::VADC(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPIVV(m_buffer, 0b010000, VecMask::Yes, vs2, vs1, vd); +} + +void Assembler::VADC(Vec vd, Vec vs2, GPR rs1) noexcept { + EmitVectorOPIVX(m_buffer, 0b010000, VecMask::Yes, vs2, rs1, vd); +} + +void Assembler::VADC(Vec vd, Vec vs2, int32_t simm) noexcept { + EmitVectorOPIVI(m_buffer, 0b010000, VecMask::Yes, vs2, simm, vd); +} + +void Assembler::VADD(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b000000, mask, vs2, vs1, vd); +} + +void Assembler::VADD(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000000, mask, vs2, rs1, vd); +} + +void Assembler::VADD(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b000000, mask, vs2, simm, vd); +} + +void Assembler::VAND(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b001001, mask, vs2, vs1, vd); +} + +void Assembler::VAND(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b001001, mask, vs2, rs1, vd); +} + +void Assembler::VAND(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b001001, mask, vs2, simm, vd); +} + +void Assembler::VASUB(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b001011, mask, vs2, vs1, vd); +} + +void Assembler::VASUB(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001011, mask, vs2, rs1, vd); +} + +void Assembler::VASUBU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b001010, mask, vs2, vs1, vd); +} + +void Assembler::VASUBU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001010, mask, vs2, rs1, vd); +} + +void Assembler::VCOMPRESS(Vec vd, Vec vs2, Vec vs1) noexcept { + // Note: Destination register may not overlap any of the source registers, + // as per the RVV spec (as of 1.0RC; see section 16.5) + EmitVectorOPMVV(m_buffer, 0b010111, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VDIV(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100001, mask, vs2, vs1, vd); +} + +void Assembler::VDIV(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100001, mask, vs2, rs1, vd); +} + +void Assembler::VDIVU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100000, mask, vs2, vs1, vd); +} + +void Assembler::VDIVU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100000, mask, vs2, rs1, vd); +} + +void Assembler::VFIRST(GPR rd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010000, mask, vs, v17, Vec{rd.Index()}); +} + +void Assembler::VID(Vec vd, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010100, mask, v0, v17, vd); +} + +void Assembler::VIOTA(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010100, mask, vs, v16, vd); +} + +void Assembler::VMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b101101, mask, vs2, vs1, vd); +} + +void Assembler::VMACC(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b101101, mask, vs2, rs1, vd); +} + +void Assembler::VMADC(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b010001, mask, vs2, vs1, vd); +} + +void Assembler::VMADC(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b010001, mask, vs2, rs1, vd); +} + +void Assembler::VMADC(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b010001, mask, vs2, simm, vd); +} + +void Assembler::VMADD(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b101001, mask, vs2, vs1, vd); +} + +void Assembler::VMADD(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b101001, mask, vs2, rs1, vd); +} + +void Assembler::VMAND(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011001, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMANDNOT(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011000, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMNAND(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011101, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMNOR(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011110, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMOR(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011010, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMORNOT(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011100, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMXNOR(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011111, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMXOR(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVV(m_buffer, 0b011011, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b000111, mask, vs2, vs1, vd); +} + +void Assembler::VMAX(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000111, mask, vs2, rs1, vd); +} + +void Assembler::VMAXU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b000110, mask, vs2, vs1, vd); +} + +void Assembler::VMAXU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000110, mask, vs2, rs1, vd); +} + +void Assembler::VMERGE(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPIVV(m_buffer, 0b010111, VecMask::Yes, vs2, vs1, vd); +} + +void Assembler::VMERGE(Vec vd, Vec vs2, GPR rs1) noexcept { + EmitVectorOPIVX(m_buffer, 0b010111, VecMask::Yes, vs2, rs1, vd); +} + +void Assembler::VMERGE(Vec vd, Vec vs2, int32_t simm) noexcept { + EmitVectorOPIVI(m_buffer, 0b010111, VecMask::Yes, vs2, simm, vd); +} + +void Assembler::VMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b000101, mask, vs2, vs1, vd); +} + +void Assembler::VMIN(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000101, mask, vs2, rs1, vd); +} + +void Assembler::VMINU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b000100, mask, vs2, vs1, vd); +} + +void Assembler::VMINU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000100, mask, vs2, rs1, vd); +} + +void Assembler::VMSBC(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b010011, mask, vs2, vs1, vd); +} + +void Assembler::VMSBC(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b010011, mask, vs2, rs1, vd); +} + +void Assembler::VMSBF(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010100, mask, vs, v1, vd); +} + +void Assembler::VMSIF(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010100, mask, vs, v3, vd); +} + +void Assembler::VMSOF(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010100, mask, vs, v2, vd); +} + +void Assembler::VMSEQ(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b011000, mask, vs2, vs1, vd); +} + +void Assembler::VMSEQ(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011000, mask, vs2, rs1, vd); +} + +void Assembler::VMSEQ(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b011000, mask, vs2, simm, vd); +} + +void Assembler::VMSGT(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011111, mask, vs2, rs1, vd); +} + +void Assembler::VMSGT(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b011111, mask, vs2, simm, vd); +} + +void Assembler::VMSGTU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011110, mask, vs2, rs1, vd); +} + +void Assembler::VMSGTU(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b011110, mask, vs2, simm, vd); +} + +void Assembler::VMSLE(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b011101, mask, vs2, vs1, vd); +} + +void Assembler::VMSLE(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011101, mask, vs2, rs1, vd); +} + +void Assembler::VMSLE(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b011101, mask, vs2, simm, vd); +} + +void Assembler::VMSLEU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b011100, mask, vs2, vs1, vd); +} + +void Assembler::VMSLEU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011100, mask, vs2, rs1, vd); +} + +void Assembler::VMSLEU(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b011100, mask, vs2, simm, vd); +} + +void Assembler::VMSLT(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b011011, mask, vs2, vs1, vd); +} + +void Assembler::VMSLT(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011011, mask, vs2, rs1, vd); +} + +void Assembler::VMSLTU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b011010, mask, vs2, vs1, vd); +} + +void Assembler::VMSLTU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011010, mask, vs2, rs1, vd); +} + +void Assembler::VMSNE(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b011001, mask, vs2, vs1, vd); +} + +void Assembler::VMSNE(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b011001, mask, vs2, rs1, vd); +} + +void Assembler::VMSNE(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b011001, mask, vs2, simm, vd); +} + +void Assembler::VMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100101, mask, vs2, vs1, vd); +} + +void Assembler::VMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100101, mask, vs2, rs1, vd); +} + +void Assembler::VMULH(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100111, mask, vs2, vs1, vd); +} + +void Assembler::VMULH(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100111, mask, vs2, rs1, vd); +} + +void Assembler::VMULHSU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100110, mask, vs2, vs1, vd); +} + +void Assembler::VMULHSU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100110, mask, vs2, rs1, vd); +} + +void Assembler::VMULHU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100100, mask, vs2, vs1, vd); +} + +void Assembler::VMULHU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100100, mask, vs2, rs1, vd); +} + +void Assembler::VMV(Vec vd, Vec vs1) noexcept { + EmitVectorOPIVV(m_buffer, 0b010111, VecMask::No, v0, vs1, vd); +} + +void Assembler::VMV(Vec vd, GPR rs1) noexcept { + EmitVectorOPIVX(m_buffer, 0b010111, VecMask::No, v0, rs1, vd); +} + +void Assembler::VMV(Vec vd, int32_t simm) noexcept { + EmitVectorOPIVI(m_buffer, 0b010111, VecMask::No, v0, simm, vd); +} + +void Assembler::VMV1R(Vec vd, Vec vs) noexcept { + EmitVectorOPIVI(m_buffer, 0b100111, VecMask::No, vs, 0b00000, vd); +} + +void Assembler::VMV2R(Vec vd, Vec vs) noexcept { + // Registers must be aligned to the register group size, per the + // RVV spec (as of 1.0RC) + BISCUIT_ASSERT(vd.Index() % 2 == 0); + BISCUIT_ASSERT(vs.Index() % 2 == 0); + + EmitVectorOPIVI(m_buffer, 0b100111, VecMask::No, vs, 0b00001, vd); +} + +void Assembler::VMV4R(Vec vd, Vec vs) noexcept { + // Registers must be aligned to the register group size, per the + // RVV spec (as of 1.0RC) + BISCUIT_ASSERT(vd.Index() % 4 == 0); + BISCUIT_ASSERT(vs.Index() % 4 == 0); + + EmitVectorOPIVI(m_buffer, 0b100111, VecMask::No, vs, 0b00011, vd); +} + +void Assembler::VMV8R(Vec vd, Vec vs) noexcept { + // Registers must be aligned to the register group size, per the + // RVV spec (as of 1.0RC) + BISCUIT_ASSERT(vd.Index() % 8 == 0); + BISCUIT_ASSERT(vs.Index() % 8 == 0); + + EmitVectorOPIVI(m_buffer, 0b100111, VecMask::No, vs, 0b00111, vd); +} + +void Assembler::VMV_SX(Vec vd, GPR rs) noexcept { + EmitVectorOPMVX(m_buffer, 0b010000, VecMask::No, v0, rs, vd); +} + +void Assembler::VMV_XS(GPR rd, Vec vs) noexcept { + EmitVectorOPMVV(m_buffer, 0b010000, VecMask::No, vs, v0, Vec{rd.Index()}); +} + +void Assembler::VNCLIP(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101111, mask, vs2, vs1, vd); +} + +void Assembler::VNCLIP(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101111, mask, vs2, rs1, vd); +} + +void Assembler::VNCLIP(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101111, mask, vs2, uimm, vd); +} + +void Assembler::VNCLIPU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101110, mask, vs2, vs1, vd); +} + +void Assembler::VNCLIPU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101110, mask, vs2, rs1, vd); +} + +void Assembler::VNCLIPU(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101110, mask, vs2, uimm, vd); +} + +void Assembler::VNMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b101111, mask, vs2, vs1, vd); +} + +void Assembler::VNMSAC(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b101111, mask, vs2, rs1, vd); +} + +void Assembler::VNMSUB(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b101011, mask, vs2, vs1, vd); +} + +void Assembler::VNMSUB(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b101011, mask, vs2, rs1, vd); +} + +void Assembler::VNSRA(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101101, mask, vs2, vs1, vd); +} + +void Assembler::VNSRA(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101101, mask, vs2, rs1, vd); +} + +void Assembler::VNSRA(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101101, mask, vs2, uimm, vd); +} + +void Assembler::VNSRL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101100, mask, vs2, vs1, vd); +} + +void Assembler::VNSRL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101100, mask, vs2, rs1, vd); +} + +void Assembler::VNSRL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101100, mask, vs2, uimm, vd); +} + +void Assembler::VOR(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b001010, mask, vs2, vs1, vd); +} + +void Assembler::VOR(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b001010, mask, vs2, rs1, vd); +} + +void Assembler::VOR(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b001010, mask, vs2, simm, vd); +} + +void Assembler::VPOPC(GPR rd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010000, mask, vs, v16, Vec{rd.Index()}); +} + +void Assembler::VREDAND(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000001, mask, vs2, vs1, vd); +} + +void Assembler::VREDMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000111, mask, vs2, vs1, vd); +} + +void Assembler::VREDMAXU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000110, mask, vs2, vs1, vd); +} + +void Assembler::VREDMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000101, mask, vs2, vs1, vd); +} + +void Assembler::VREDMINU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000100, mask, vs2, vs1, vd); +} + +void Assembler::VREDOR(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000010, mask, vs2, vs1, vd); +} + +void Assembler::VREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000000, mask, vs2, vs1, vd); +} + +void Assembler::VREDXOR(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b000011, mask, vs2, vs1, vd); +} + +void Assembler::VREM(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100011, mask, vs2, vs1, vd); +} + +void Assembler::VREM(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100011, mask, vs2, rs1, vd); +} + +void Assembler::VREMU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b100010, mask, vs2, vs1, vd); +} + +void Assembler::VREMU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b100010, mask, vs2, rs1, vd); +} + +void Assembler::VRGATHER(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b001100, mask, vs2, vs1, vd); +} + +void Assembler::VRGATHER(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b001100, mask, vs2, rs1, vd); +} + +void Assembler::VRGATHER(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b001100, mask, vs2, uimm, vd); +} + +void Assembler::VRGATHEREI16(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b001110, mask, vs2, vs1, vd); +} + +void Assembler::VRSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000011, mask, vs2, rs1, vd); +} + +void Assembler::VRSUB(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b000011, mask, vs2, simm, vd); +} + +void Assembler::VSADD(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b100001, mask, vs2, vs1, vd); +} + +void Assembler::VSADD(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b100001, mask, vs2, rs1, vd); +} + +void Assembler::VSADD(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b100001, mask, vs2, simm, vd); +} + +void Assembler::VSADDU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b100000, mask, vs2, vs1, vd); +} + +void Assembler::VSADDU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b100000, mask, vs2, rs1, vd); +} + +void Assembler::VSADDU(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b100000, mask, vs2, simm, vd); +} + +void Assembler::VSBC(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPIVV(m_buffer, 0b010010, VecMask::Yes, vs2, vs1, vd); +} + +void Assembler::VSBC(Vec vd, Vec vs2, GPR rs1) noexcept { + EmitVectorOPIVX(m_buffer, 0b010010, VecMask::Yes, vs2, rs1, vd); +} + +void Assembler::VSEXTVF2(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs, v7, vd); +} + +void Assembler::VSEXTVF4(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs, v5, vd); +} + +void Assembler::VSEXTVF8(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs, v3, vd); +} + +void Assembler::VSLIDE1DOWN(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001111, mask, vs2, rs1, vd); +} + +void Assembler::VSLIDEDOWN(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b001111, mask, vs2, rs1, vd); +} + +void Assembler::VSLIDEDOWN(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b001111, mask, vs2, uimm, vd); +} + +void Assembler::VSLIDE1UP(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001110, mask, vs2, rs1, vd); +} + +void Assembler::VSLIDEUP(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b001110, mask, vs2, rs1, vd); +} + +void Assembler::VSLIDEUP(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b001110, mask, vs2, uimm, vd); +} + +void Assembler::VSLL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b100101, mask, vs2, vs1, vd); +} + +void Assembler::VSLL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b100101, mask, vs2, rs1, vd); +} + +void Assembler::VSLL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b100101, mask, vs2, uimm, vd); +} + +void Assembler::VSMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b100111, mask, vs2, vs1, vd); +} + +void Assembler::VSMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b100111, mask, vs2, rs1, vd); +} + +void Assembler::VSRA(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101001, mask, vs2, vs1, vd); +} + +void Assembler::VSRA(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101001, mask, vs2, rs1, vd); +} + +void Assembler::VSRA(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101001, mask, vs2, uimm, vd); +} + +void Assembler::VSRL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101000, mask, vs2, vs1, vd); +} + +void Assembler::VSRL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101000, mask, vs2, rs1, vd); +} + +void Assembler::VSRL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101000, mask, vs2, uimm, vd); +} + +void Assembler::VSSRA(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101011, mask, vs2, vs1, vd); +} + +void Assembler::VSSRA(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101011, mask, vs2, rs1, vd); +} + +void Assembler::VSSRA(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101011, mask, vs2, uimm, vd); +} + +void Assembler::VSSRL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b101010, mask, vs2, vs1, vd); +} + +void Assembler::VSSRL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b101010, mask, vs2, rs1, vd); +} + +void Assembler::VSSRL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b101010, mask, vs2, uimm, vd); +} + +void Assembler::VSSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b100011, mask, vs2, vs1, vd); +} + +void Assembler::VSSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b100011, mask, vs2, rs1, vd); +} + +void Assembler::VSSUBU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b100010, mask, vs2, vs1, vd); +} + +void Assembler::VSSUBU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b100010, mask, vs2, rs1, vd); +} + +void Assembler::VSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b000010, mask, vs2, vs1, vd); +} + +void Assembler::VSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000010, mask, vs2, rs1, vd); +} + +void Assembler::VWADD(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110001, mask, vs2, vs1, vd); +} + +void Assembler::VWADD(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110001, mask, vs2, rs1, vd); +} + +void Assembler::VWADDW(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110101, mask, vs2, vs1, vd); +} + +void Assembler::VWADDW(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110101, mask, vs2, rs1, vd); +} + +void Assembler::VWADDU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110000, mask, vs2, vs1, vd); +} + +void Assembler::VWADDU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110000, mask, vs2, rs1, vd); +} + +void Assembler::VWADDUW(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110100, mask, vs2, vs1, vd); +} + +void Assembler::VWADDUW(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110100, mask, vs2, rs1, vd); +} + +void Assembler::VWMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b111101, mask, vs2, vs1, vd); +} + +void Assembler::VWMACC(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b111101, mask, vs2, rs1, vd); +} + +void Assembler::VWMACCSU(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b111111, mask, vs2, vs1, vd); +} + +void Assembler::VWMACCSU(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b111111, mask, vs2, rs1, vd); +} + +void Assembler::VWMACCU(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b111100, mask, vs2, vs1, vd); +} + +void Assembler::VWMACCU(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b111100, mask, vs2, rs1, vd); +} + +void Assembler::VWMACCUS(Vec vd, GPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b111110, mask, vs2, rs1, vd); +} + +void Assembler::VWMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b111011, mask, vs2, vs1, vd); +} + +void Assembler::VWMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b111011, mask, vs2, rs1, vd); +} + +void Assembler::VWMULSU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b111010, mask, vs2, vs1, vd); +} + +void Assembler::VWMULSU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b111010, mask, vs2, rs1, vd); +} + +void Assembler::VWMULU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b111000, mask, vs2, vs1, vd); +} + +void Assembler::VWMULU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b111000, mask, vs2, rs1, vd); +} + +void Assembler::VWREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b110001, mask, vs2, vs1, vd); +} + +void Assembler::VWREDSUMU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b110000, mask, vs2, vs1, vd); +} + +void Assembler::VWSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110011, mask, vs2, vs1, vd); +} + +void Assembler::VWSUB(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110011, mask, vs2, rs1, vd); +} + +void Assembler::VWSUBW(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110111, mask, vs2, vs1, vd); +} + +void Assembler::VWSUBW(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110111, mask, vs2, rs1, vd); +} + +void Assembler::VWSUBU(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110010, mask, vs2, vs1, vd); +} + +void Assembler::VWSUBU(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110010, mask, vs2, rs1, vd); +} + +void Assembler::VWSUBUW(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b110110, mask, vs2, vs1, vd); +} + +void Assembler::VWSUBUW(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b110110, mask, vs2, rs1, vd); +} + +void Assembler::VXOR(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b001011, mask, vs2, vs1, vd); +} + +void Assembler::VXOR(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b001011, mask, vs2, rs1, vd); +} + +void Assembler::VXOR(Vec vd, Vec vs2, int32_t simm, VecMask mask) noexcept { + EmitVectorOPIVI(m_buffer, 0b001011, mask, vs2, simm, vd); +} + +void Assembler::VZEXTVF2(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs, v6, vd); +} + +void Assembler::VZEXTVF4(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs, v4, vd); +} + +void Assembler::VZEXTVF8(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs, v2, vd); +} + +// Vector Floating-Point Instructions + +void Assembler::VFADD(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000000, mask, vs2, vs1, vd); +} + +void Assembler::VFADD(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b000000, mask, vs2, rs1, vd); +} + +void Assembler::VFCLASS(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010011, mask, vs, v16, vd); +} + +void Assembler::VFCVT_F_X(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v3, vd); +} + +void Assembler::VFCVT_F_XU(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v2, vd); +} + +void Assembler::VFCVT_RTZ_X_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v7, vd); +} + +void Assembler::VFCVT_RTZ_XU_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v6, vd); +} + +void Assembler::VFCVT_X_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v1, vd); +} + +void Assembler::VFCVT_XU_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v0, vd); +} + +void Assembler::VFNCVT_F_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v20, vd); +} + +void Assembler::VFNCVT_F_X(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v19, vd); +} + +void Assembler::VFNCVT_F_XU(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v18, vd); +} + +void Assembler::VFNCVT_ROD_F_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v21, vd); +} + +void Assembler::VFNCVT_RTZ_X_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v23, vd); +} + +void Assembler::VFNCVT_RTZ_XU_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v22, vd); +} + +void Assembler::VFNCVT_X_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v17, vd); +} + +void Assembler::VFNCVT_XU_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v16, vd); +} + +void Assembler::VFWCVT_F_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v12, vd); +} + +void Assembler::VFWCVT_F_X(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v11, vd); +} + +void Assembler::VFWCVT_F_XU(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v10, vd); +} + +void Assembler::VFWCVT_RTZ_X_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v15, vd); +} + +void Assembler::VFWCVT_RTZ_XU_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v14, vd); +} + +void Assembler::VFWCVT_X_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v9, vd); +} + +void Assembler::VFWCVT_XU_F(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v8, vd); +} + +void Assembler::VFDIV(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b100000, mask, vs2, vs1, vd); +} + +void Assembler::VFDIV(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b100000, mask, vs2, rs1, vd); +} + +void Assembler::VFRDIV(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b100001, mask, vs2, rs1, vd); +} + +void Assembler::VFREDMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000111, mask, vs2, vs1, vd); +} + +void Assembler::VFREDMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000101, mask, vs2, vs1, vd); +} + +void Assembler::VFREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000001, mask, vs2, vs1, vd); +} + +void Assembler::VFREDOSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000011, mask, vs2, vs1, vd); +} + +void Assembler::VFMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101100, mask, vs2, vs1, vd); +} + +void Assembler::VFMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101100, mask, vs2, rs1, vd); +} + +void Assembler::VFMADD(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101000, mask, vs2, vs1, vd); +} + +void Assembler::VFMADD(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101000, mask, vs2, rs1, vd); +} + +void Assembler::VFMAX(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000110, mask, vs2, vs1, vd); +} + +void Assembler::VFMAX(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b000110, mask, vs2, rs1, vd); +} + +void Assembler::VFMERGE(Vec vd, Vec vs2, FPR rs1) noexcept { + EmitVectorOPFVF(m_buffer, 0b010111, VecMask::Yes, vs2, rs1, vd); +} + +void Assembler::VFMIN(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000100, mask, vs2, vs1, vd); +} + +void Assembler::VFMIN(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b000100, mask, vs2, rs1, vd); +} + +void Assembler::VFMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101110, mask, vs2, vs1, vd); +} + +void Assembler::VFMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101110, mask, vs2, rs1, vd); +} + +void Assembler::VFMSUB(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101010, mask, vs2, vs1, vd); +} + +void Assembler::VFMSUB(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101010, mask, vs2, rs1, vd); +} + +void Assembler::VFMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b100100, mask, vs2, vs1, vd); +} + +void Assembler::VFMUL(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b100100, mask, vs2, rs1, vd); +} + +void Assembler::VFMV(Vec vd, FPR rs) noexcept { + EmitVectorOPFVF(m_buffer, 0b010111, VecMask::No, v0, rs, vd); +} + +void Assembler::VFMV_FS(FPR rd, Vec vs) noexcept { + EmitVectorOPFVV(m_buffer, 0b010000, VecMask::No, vs, v0, Vec{rd.Index()}); +} + +void Assembler::VFMV_SF(Vec vd, FPR rs) noexcept { + EmitVectorOPFVF(m_buffer, 0b010000, VecMask::No, v0, rs, vd); +} + +void Assembler::VFNMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101101, mask, vs2, vs1, vd); +} + +void Assembler::VFNMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101101, mask, vs2, rs1, vd); +} + +void Assembler::VFNMADD(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101001, mask, vs2, vs1, vd); +} + +void Assembler::VFNMADD(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101001, mask, vs2, rs1, vd); +} + +void Assembler::VFNMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101111, mask, vs2, vs1, vd); +} + +void Assembler::VFNMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101111, mask, vs2, rs1, vd); +} + +void Assembler::VFNMSUB(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b101011, mask, vs2, vs1, vd); +} + +void Assembler::VFNMSUB(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b101011, mask, vs2, rs1, vd); +} + +void Assembler::VFREC7(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010011, mask, vs, v5, vd); +} + +void Assembler::VFSGNJ(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b001000, mask, vs2, vs1, vd); +} + +void Assembler::VFSGNJ(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b001000, mask, vs2, rs1, vd); +} + +void Assembler::VFSGNJN(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b001001, mask, vs2, vs1, vd); +} + +void Assembler::VFSGNJN(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b001001, mask, vs2, rs1, vd); +} + +void Assembler::VFSGNJX(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b001010, mask, vs2, vs1, vd); +} + +void Assembler::VFSGNJX(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b001010, mask, vs2, rs1, vd); +} + +void Assembler::VFSQRT(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010011, mask, vs, v0, vd); +} + +void Assembler::VFRSQRT7(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010011, mask, vs, v4, vd); +} + +void Assembler::VFSLIDE1DOWN(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b001111, mask, vs2, rs1, vd); +} + +void Assembler::VFSLIDE1UP(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b001110, mask, vs2, rs1, vd); +} + +void Assembler::VFSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b000010, mask, vs2, vs1, vd); +} + +void Assembler::VFSUB(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b000010, mask, vs2, rs1, vd); +} + +void Assembler::VFRSUB(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b100111, mask, vs2, rs1, vd); +} + +void Assembler::VFWADD(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b110000, mask, vs2, vs1, vd); +} + +void Assembler::VFWADD(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b110000, mask, vs2, rs1, vd); +} + +void Assembler::VFWADDW(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b110100, mask, vs2, vs1, vd); +} + +void Assembler::VFWADDW(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b110100, mask, vs2, rs1, vd); +} + +void Assembler::VFWMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b111100, mask, vs2, vs1, vd); +} + +void Assembler::VFWMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b111100, mask, vs2, rs1, vd); +} + +void Assembler::VFWMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b111000, mask, vs2, vs1, vd); +} + +void Assembler::VFWMUL(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b111000, mask, vs2, rs1, vd); +} + +void Assembler::VFWNMACC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b111101, mask, vs2, vs1, vd); +} + +void Assembler::VFWNMACC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b111101, mask, vs2, rs1, vd); +} + +void Assembler::VFWNMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b111111, mask, vs2, vs1, vd); +} + +void Assembler::VFWNMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b111111, mask, vs2, rs1, vd); +} + +void Assembler::VFWREDSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b110001, mask, vs2, vs1, vd); +} + +void Assembler::VFWREDOSUM(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b110011, mask, vs2, vs1, vd); +} + +void Assembler::VFWMSAC(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b111110, mask, vs2, vs1, vd); +} + +void Assembler::VFWMSAC(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b111110, mask, vs2, rs1, vd); +} + +void Assembler::VFWSUB(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b110010, mask, vs2, vs1, vd); +} + +void Assembler::VFWSUB(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b110010, mask, vs2, rs1, vd); +} + +void Assembler::VFWSUBW(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b110110, mask, vs2, vs1, vd); +} + +void Assembler::VFWSUBW(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b110110, mask, vs2, rs1, vd); +} + +void Assembler::VMFEQ(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b011000, mask, vs2, vs1, vd); +} + +void Assembler::VMFEQ(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b011000, mask, vs2, rs1, vd); +} + +void Assembler::VMFGE(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b011111, mask, vs2, rs1, vd); +} + +void Assembler::VMFGT(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b011101, mask, vs2, rs1, vd); +} + +void Assembler::VMFLE(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b011001, mask, vs2, vs1, vd); +} + +void Assembler::VMFLE(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b011001, mask, vs2, rs1, vd); +} + +void Assembler::VMFLT(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b011011, mask, vs2, vs1, vd); +} + +void Assembler::VMFLT(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b011011, mask, vs2, rs1, vd); +} + +void Assembler::VMFNE(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b011100, mask, vs2, vs1, vd); +} + +void Assembler::VMFNE(Vec vd, Vec vs2, FPR rs1, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b011100, mask, vs2, rs1, vd); +} + +// Vector Load/Store Instructions + +void Assembler::VLE8(Vec vd, GPR rs, VecMask mask) noexcept { + VLSEGE8(1, vd, rs, mask); +} + +void Assembler::VLE16(Vec vd, GPR rs, VecMask mask) noexcept { + VLSEGE16(1, vd, rs, mask); +} + +void Assembler::VLE32(Vec vd, GPR rs, VecMask mask) noexcept { + VLSEGE32(1, vd, rs, mask); +} + +void Assembler::VLE64(Vec vd, GPR rs, VecMask mask) noexcept { + VLSEGE64(1, vd, rs, mask); +} + +void Assembler::VLM(Vec vd, GPR rs) noexcept { + EmitVectorLoad(m_buffer, 0b000, false, AddressingMode::UnitStride, VecMask::No, + UnitStrideLoadAddressingMode::MaskLoad, rs, WidthEncoding::E8, vd); +} + +void Assembler::VLSE8(Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + VLSSEGE8(1, vd, rs1, rs2, mask); +} + +void Assembler::VLSE16(Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + VLSSEGE16(1, vd, rs1, rs2, mask); +} + +void Assembler::VLSE32(Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + VLSSEGE32(1, vd, rs1, rs2, mask); +} + +void Assembler::VLSE64(Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + VLSSEGE64(1, vd, rs1, rs2, mask); +} + +void Assembler::VLOXEI8(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLOXSEGEI8(1, vd, rs, vs, mask); +} + +void Assembler::VLOXEI16(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLOXSEGEI16(1, vd, rs, vs, mask); +} + +void Assembler::VLOXEI32(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLOXSEGEI32(1, vd, rs, vs, mask); +} + +void Assembler::VLOXEI64(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLOXSEGEI64(1, vd, rs, vs, mask); +} + +void Assembler::VLUXEI8(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLUXSEGEI8(1, vd, rs, vs, mask); +} + +void Assembler::VLUXEI16(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLUXSEGEI16(1, vd, rs, vs, mask); +} + +void Assembler::VLUXEI32(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLUXSEGEI32(1, vd, rs, vs, mask); +} + +void Assembler::VLUXEI64(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VLUXSEGEI64(1, vd, rs, vs, mask); +} + +void Assembler::VLE8FF(Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, 0b000, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::LoadFaultOnlyFirst, rs, WidthEncoding::E8, vd); +} + +void Assembler::VLE16FF(Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, 0b000, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::LoadFaultOnlyFirst, rs, WidthEncoding::E16, vd); +} + +void Assembler::VLE32FF(Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, 0b000, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::LoadFaultOnlyFirst, rs, WidthEncoding::E32, vd); +} + +void Assembler::VLE64FF(Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, 0b000, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::LoadFaultOnlyFirst, rs, WidthEncoding::E64, vd); +} + +void Assembler::VLSEGE8(uint32_t num_segments, Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::Load, rs, WidthEncoding::E8, vd); +} + +void Assembler::VLSEGE16(uint32_t num_segments, Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::Load, rs, WidthEncoding::E16, vd); +} + +void Assembler::VLSEGE32(uint32_t num_segments, Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::Load, rs, WidthEncoding::E32, vd); +} + +void Assembler::VLSEGE64(uint32_t num_segments, Vec vd, GPR rs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideLoadAddressingMode::Load, rs, WidthEncoding::E64, vd); +} + +void Assembler::VLSSEGE8(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E8, vd); +} + +void Assembler::VLSSEGE16(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E16, vd); +} + +void Assembler::VLSSEGE32(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E32, vd); +} + +void Assembler::VLSSEGE64(uint32_t num_segments, Vec vd, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E64, vd); +} + +void Assembler::VLOXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E8, vd); +} + +void Assembler::VLOXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E16, vd); +} + +void Assembler::VLOXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E32, vd); +} + +void Assembler::VLOXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E64, vd); +} + +void Assembler::VLUXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E8, vd); +} + +void Assembler::VLUXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E16, vd); +} + +void Assembler::VLUXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E32, vd); +} + +void Assembler::VLUXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorLoad(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E64, vd); +} + +void Assembler::VLRE8(uint32_t num_registers, Vec vd, GPR rs) noexcept { + BISCUIT_ASSERT(vd.Index() % num_registers == 0); + EmitVectorLoadWholeReg(m_buffer, num_registers, false, rs, WidthEncoding::E8, vd); +} + +void Assembler::VL1RE8(Vec vd, GPR rs) noexcept { + VLRE8(1, vd, rs); +} + +void Assembler::VL2RE8(Vec vd, GPR rs) noexcept { + VLRE8(2, vd, rs); +} + +void Assembler::VL4RE8(Vec vd, GPR rs) noexcept { + VLRE8(4, vd, rs); +} + +void Assembler::VL8RE8(Vec vd, GPR rs) noexcept { + VLRE8(8, vd, rs); +} + +void Assembler::VLRE16(uint32_t num_registers, Vec vd, GPR rs) noexcept { + BISCUIT_ASSERT(vd.Index() % num_registers == 0); + EmitVectorLoadWholeReg(m_buffer, num_registers, false, rs, WidthEncoding::E16, vd); +} + +void Assembler::VL1RE16(Vec vd, GPR rs) noexcept { + VLRE16(1, vd, rs); +} + +void Assembler::VL2RE16(Vec vd, GPR rs) noexcept { + VLRE16(2, vd, rs); +} + +void Assembler::VL4RE16(Vec vd, GPR rs) noexcept { + VLRE16(4, vd, rs); +} + +void Assembler::VL8RE16(Vec vd, GPR rs) noexcept { + VLRE16(8, vd, rs); +} + +void Assembler::VLRE32(uint32_t num_registers, Vec vd, GPR rs) noexcept { + BISCUIT_ASSERT(vd.Index() % num_registers == 0); + EmitVectorLoadWholeReg(m_buffer, num_registers, false, rs, WidthEncoding::E32, vd); +} + +void Assembler::VL1RE32(Vec vd, GPR rs) noexcept { + VLRE32(1, vd, rs); +} + +void Assembler::VL2RE32(Vec vd, GPR rs) noexcept { + VLRE32(2, vd, rs); +} + +void Assembler::VL4RE32(Vec vd, GPR rs) noexcept { + VLRE32(4, vd, rs); +} + +void Assembler::VL8RE32(Vec vd, GPR rs) noexcept { + VLRE32(8, vd, rs); +} + +void Assembler::VLRE64(uint32_t num_registers, Vec vd, GPR rs) noexcept { + BISCUIT_ASSERT(vd.Index() % num_registers == 0); + EmitVectorLoadWholeReg(m_buffer, num_registers, false, rs, WidthEncoding::E64, vd); +} + +void Assembler::VL1RE64(Vec vd, GPR rs) noexcept { + VLRE64(1, vd, rs); +} + +void Assembler::VL2RE64(Vec vd, GPR rs) noexcept { + VLRE64(2, vd, rs); +} + +void Assembler::VL4RE64(Vec vd, GPR rs) noexcept { + VLRE64(4, vd, rs); +} + +void Assembler::VL8RE64(Vec vd, GPR rs) noexcept { + VLRE64(8, vd, rs); +} + +void Assembler::VSE8(Vec vs, GPR rs, VecMask mask) noexcept { + VSSEGE8(1, vs, rs, mask); +} + +void Assembler::VSE16(Vec vs, GPR rs, VecMask mask) noexcept { + VSSEGE16(1, vs, rs, mask); +} + +void Assembler::VSE32(Vec vs, GPR rs, VecMask mask) noexcept { + VSSEGE32(1, vs, rs, mask); +} + +void Assembler::VSE64(Vec vs, GPR rs, VecMask mask) noexcept { + VSSEGE64(1, vs, rs, mask); +} + +void Assembler::VSM(Vec vs, GPR rs) noexcept { + EmitVectorStore(m_buffer, 0b000, false, AddressingMode::UnitStride, VecMask::No, + UnitStrideStoreAddressingMode::MaskStore, rs, WidthEncoding::E8, vs); +} + +void Assembler::VSSE8(Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + VSSSEGE8(1, vs, rs1, rs2, mask); +} + +void Assembler::VSSE16(Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + VSSSEGE16(1, vs, rs1, rs2, mask); +} + +void Assembler::VSSE32(Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + VSSSEGE32(1, vs, rs1, rs2, mask); +} + +void Assembler::VSSE64(Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + VSSSEGE64(1, vs, rs1, rs2, mask); +} + +void Assembler::VSOXEI8(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSOXSEGEI8(1, vd, rs, vs, mask); +} + +void Assembler::VSOXEI16(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSOXSEGEI16(1, vd, rs, vs, mask); +} + +void Assembler::VSOXEI32(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSOXSEGEI32(1, vd, rs, vs, mask); +} + +void Assembler::VSOXEI64(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSOXSEGEI64(1, vd, rs, vs, mask); +} + +void Assembler::VSUXEI8(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSUXSEGEI8(1, vd, rs, vs, mask); +} + +void Assembler::VSUXEI16(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSUXSEGEI16(1, vd, rs, vs, mask); +} + +void Assembler::VSUXEI32(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSUXSEGEI32(1, vd, rs, vs, mask); +} + +void Assembler::VSUXEI64(Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + VSUXSEGEI64(1, vd, rs, vs, mask); +} + +void Assembler::VSSEGE8(uint32_t num_segments, Vec vs, GPR rs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideStoreAddressingMode::Store, rs, WidthEncoding::E8, vs); +} + +void Assembler::VSSEGE16(uint32_t num_segments, Vec vs, GPR rs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideStoreAddressingMode::Store, rs, WidthEncoding::E16, vs); +} + +void Assembler::VSSEGE32(uint32_t num_segments, Vec vs, GPR rs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideStoreAddressingMode::Store, rs, WidthEncoding::E32, vs); +} + +void Assembler::VSSEGE64(uint32_t num_segments, Vec vs, GPR rs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::UnitStride, mask, + UnitStrideStoreAddressingMode::Store, rs, WidthEncoding::E64, vs); +} + +void Assembler::VSSSEGE8(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E8, vs); +} + +void Assembler::VSSSEGE16(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E16, vs); +} + +void Assembler::VSSSEGE32(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E32, vs); +} + +void Assembler::VSSSEGE64(uint32_t num_segments, Vec vs, GPR rs1, GPR rs2, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::Strided, mask, + rs2, rs1, WidthEncoding::E64, vs); +} + +void Assembler::VSOXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E8, vd); +} + +void Assembler::VSOXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E16, vd); +} + +void Assembler::VSOXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E32, vd); +} + +void Assembler::VSOXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedOrdered, mask, + vs, rs, WidthEncoding::E64, vd); +} + +void Assembler::VSUXSEGEI8(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E8, vd); +} + +void Assembler::VSUXSEGEI16(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E16, vd); +} + +void Assembler::VSUXSEGEI32(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E32, vd); +} + +void Assembler::VSUXSEGEI64(uint32_t num_segments, Vec vd, GPR rs, Vec vs, VecMask mask) noexcept { + EmitVectorStore(m_buffer, num_segments, false, AddressingMode::IndexedUnordered, mask, + vs, rs, WidthEncoding::E64, vd); +} + +void Assembler::VSR(uint32_t num_registers, Vec vs, GPR rs) noexcept { + EmitVectorStoreWholeReg(m_buffer, num_registers, rs, vs); +} + +void Assembler::VS1R(Vec vs, GPR rs) noexcept { + VSR(1, vs, rs); +} + +void Assembler::VS2R(Vec vs, GPR rs) noexcept { + BISCUIT_ASSERT(vs.Index() % 2 == 0); + VSR(2, vs, rs); +} + +void Assembler::VS4R(Vec vs, GPR rs) noexcept { + BISCUIT_ASSERT(vs.Index() % 4 == 0); + VSR(4, vs, rs); +} + +void Assembler::VS8R(Vec vs, GPR rs) noexcept { + BISCUIT_ASSERT(vs.Index() % 8 == 0); + VSR(8, vs, rs); +} + +void Assembler::VSETIVLI(GPR rd, uint32_t imm, SEW sew, LMUL lmul, VTA vta, VMA vma) noexcept { + // Immediate must be able to fit in 5 bits. + BISCUIT_ASSERT(imm <= 31); + + // clang-format off + const auto zimm = static_cast(lmul) | + (static_cast(sew) << 3) | + (static_cast(vta) << 6) | + (static_cast(vma) << 7); + // clang-format on + + m_buffer.Emit32(0xC0007057U | (zimm << 20) | (imm << 15) | (rd.Index() << 7)); +} + +void Assembler::VSETVL(GPR rd, GPR rs1, GPR rs2) noexcept { + m_buffer.Emit32(0x80007057U | (rs2.Index() << 20) | (rs1.Index() << 15) | (rd.Index() << 7)); +} + +void Assembler::VSETVLI(GPR rd, GPR rs, SEW sew, LMUL lmul, VTA vta, VMA vma) noexcept { + // clang-format off + const auto zimm = static_cast(lmul) | + (static_cast(sew) << 3) | + (static_cast(vta) << 6) | + (static_cast(vma) << 7); + // clang-format on + + m_buffer.Emit32(0x00007057U | (zimm << 20) | (rs.Index() << 15) | (rd.Index() << 7)); +} + +// Vector Cryptography Instructions + +void Assembler::VANDN(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b000001, mask, vs2, vs1, vd); +} +void Assembler::VANDN(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b000001, mask, vs2, rs1, vd); +} + +void Assembler::VBREV(Vec vd, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs2, Vec{0b01010}, vd); +} +void Assembler::VBREV8(Vec vd, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs2, Vec{0b01000}, vd); +} +void Assembler::VREV8(Vec vd, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs2, Vec{0b01001}, vd); +} + +void Assembler::VCLZ(Vec vd, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs2, Vec{0b01100}, vd); +} +void Assembler::VCTZ(Vec vd, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs2, Vec{0b01101}, vd); +} +void Assembler::VCPOP(Vec vd, Vec vs2, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b010010, mask, vs2, Vec{0b01110}, vd); +} + +void Assembler::VROL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b010101, mask, vs2, vs1, vd); +} +void Assembler::VROL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b010101, mask, vs2, rs1, vd); +} + +void Assembler::VROR(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b010100, mask, vs2, vs1, vd); +} +void Assembler::VROR(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b010100, mask, vs2, rs1, vd); +} +void Assembler::VROR(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + BISCUIT_ASSERT(uimm <= 63); + + const auto funct6 = 0b010100 | ((uimm & 0b100000) >> 5); + EmitVectorOPIVIImpl(m_buffer, funct6, mask, vs2, uimm, vd); +} + +void Assembler::VWSLL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPIVV(m_buffer, 0b110101, mask, vs2, vs1, vd); +} +void Assembler::VWSLL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPIVX(m_buffer, 0b110101, mask, vs2, rs1, vd); +} +void Assembler::VWSLL(Vec vd, Vec vs2, uint32_t uimm, VecMask mask) noexcept { + EmitVectorOPIVUI(m_buffer, 0b110101, mask, vs2, uimm, vd); +} + +void Assembler::VCLMUL(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b001100, mask, vs2, vs1, vd); +} +void Assembler::VCLMUL(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001100, mask, vs2, rs1, vd); +} + +void Assembler::VCLMULH(Vec vd, Vec vs2, Vec vs1, VecMask mask) noexcept { + EmitVectorOPMVV(m_buffer, 0b001101, mask, vs2, vs1, vd); +} +void Assembler::VCLMULH(Vec vd, Vec vs2, GPR rs1, VecMask mask) noexcept { + EmitVectorOPMVX(m_buffer, 0b001101, mask, vs2, rs1, vd); +} + +void Assembler::VGHSH(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101100, VecMask::No, vs2, vs1, vd); +} +void Assembler::VGMUL(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101000, VecMask::No, vs2, Vec{0b10001}, vd); +} + +void Assembler::VAESDF_VV(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101000, VecMask::No, vs2, Vec{0b00001}, vd); +} +void Assembler::VAESDF_VS(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101001, VecMask::No, vs2, Vec{0b00001}, vd); +} + +void Assembler::VAESDM_VV(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101000, VecMask::No, vs2, Vec{0}, vd); +} +void Assembler::VAESDM_VS(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101001, VecMask::No, vs2, Vec{0}, vd); +} + +void Assembler::VAESEF_VV(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101000, VecMask::No, vs2, Vec{0b00011}, vd); +} +void Assembler::VAESEF_VS(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101001, VecMask::No, vs2, Vec{0b00011}, vd); +} + +void Assembler::VAESEM_VV(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101000, VecMask::No, vs2, Vec{0b00010}, vd); +} +void Assembler::VAESEM_VS(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101001, VecMask::No, vs2, Vec{0b00010}, vd); +} + +// Little bit of weirdness (at first glance) for these is that the round +// number immediate has valid ranges: +// +// - [1, 10] for VAESKF1 +// - [2, 14] for VAESKF2 +// +// Any out of range values (0, 11-15) for VAESKF1, (0-1, 15) for VAESKF2 +// will be re-encoded into a valid range by inverting bit uimm[3] + +void Assembler::VAESKF1(Vec vd, Vec vs2, uint32_t uimm) noexcept { + BISCUIT_ASSERT(uimm <= 15); + + if (uimm == 0 || uimm > 10) { + uimm ^= 0b1000; + } + + EmitVectorOPMVVP(m_buffer, 0b100010, VecMask::No, vs2, Vec{uimm}, vd); +} +void Assembler::VAESKF2(Vec vd, Vec vs2, uint32_t uimm) noexcept { + BISCUIT_ASSERT(uimm <= 15); + + if (uimm < 2 || uimm > 14) { + uimm ^= 0b1000; + } + + EmitVectorOPMVVP(m_buffer, 0b101010, VecMask::No, vs2, Vec{uimm}, vd); +} + +void Assembler::VAESZ(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101001, VecMask::No, vs2, Vec{0b00111}, vd); +} + +void Assembler::VSHA2MS(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101101, VecMask::No, vs2, vs1, vd); +} +void Assembler::VSHA2CH(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101110, VecMask::No, vs2, vs1, vd); +} +void Assembler::VSHA2CL(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101111, VecMask::No, vs2, vs1, vd); +} + +void Assembler::VSM4K(Vec vd, Vec vs2, uint32_t uimm) noexcept { + BISCUIT_ASSERT(uimm <= 7); + EmitVectorOPMVVP(m_buffer, 0b100001, VecMask::No, vs2, Vec{uimm}, vd); +} + +void Assembler::VSM4R_VV(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101000, VecMask::No, vs2, Vec{0b10000}, vd); +} +void Assembler::VSM4R_VS(Vec vd, Vec vs2) noexcept { + EmitVectorOPMVVP(m_buffer, 0b101001, VecMask::No, vs2, Vec{0b10000}, vd); +} + +void Assembler::VSM3C(Vec vd, Vec vs2, uint32_t uimm) noexcept { + BISCUIT_ASSERT(uimm <= 31); + EmitVectorOPMVVP(m_buffer, 0b101011, VecMask::No, vs2, Vec{uimm}, vd); +} +void Assembler::VSM3ME(Vec vd, Vec vs2, Vec vs1) noexcept { + EmitVectorOPMVVP(m_buffer, 0b100000, VecMask::No, vs2, vs1, vd); +} + +// Zvfbfmin, Zvfbfwma Extension Instructions + +void Assembler::VFNCVTBF16_F_F_W(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v29, vd); +} +void Assembler::VFWCVTBF16_F_F_V(Vec vd, Vec vs, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b010010, mask, vs, v13, vd); +} + +void Assembler::VFWMACCBF16(Vec vd, FPR rs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVF(m_buffer, 0b111011, mask, vs2, rs1, vd); +} +void Assembler::VFWMACCBF16(Vec vd, Vec vs1, Vec vs2, VecMask mask) noexcept { + EmitVectorOPFVV(m_buffer, 0b111011, mask, vs2, vs1, vd); +} + +} // namespace biscuit diff --git a/src/code_buffer.cpp b/src/code_buffer.cpp new file mode 100644 index 00000000..386be375 --- /dev/null +++ b/src/code_buffer.cpp @@ -0,0 +1,111 @@ +#include +#include + +#include +#include + +#ifdef BISCUIT_CODE_BUFFER_MMAP +#include +#endif + +namespace biscuit { + +CodeBuffer::CodeBuffer(size_t capacity) + : m_capacity{capacity}, m_is_managed{true} { + if (capacity == 0) { + return; + } + +#ifdef BISCUIT_CODE_BUFFER_MMAP + m_buffer = static_cast(mmap(nullptr, capacity, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0)); + BISCUIT_ASSERT(m_buffer != nullptr); +#else + m_buffer = new uint8_t[capacity](); +#endif + + m_cursor = m_buffer; +} + +CodeBuffer::CodeBuffer(uint8_t* buffer, size_t capacity) + : m_buffer{buffer}, m_cursor{buffer}, m_capacity{capacity} { + BISCUIT_ASSERT(buffer != nullptr); +} + +CodeBuffer::CodeBuffer(CodeBuffer&& other) noexcept + : m_buffer{std::exchange(other.m_buffer, nullptr)} + , m_cursor{std::exchange(other.m_cursor, nullptr)} + , m_capacity{std::exchange(other.m_capacity, size_t{0})} + , m_is_managed{std::exchange(other.m_is_managed, false)} {} + +CodeBuffer& CodeBuffer::operator=(CodeBuffer&& other) noexcept { + if (this == &other) { + return *this; + } + + std::swap(m_buffer, other.m_buffer); + std::swap(m_cursor, other.m_cursor); + std::swap(m_capacity, other.m_capacity); + std::swap(m_is_managed, other.m_is_managed); + return *this; +} + +CodeBuffer::~CodeBuffer() noexcept { + if (!m_is_managed) { + return; + } + +#ifdef BISCUIT_CODE_BUFFER_MMAP + munmap(m_buffer, m_capacity); +#else + delete[] m_buffer; +#endif +} + +void CodeBuffer::Grow(size_t new_capacity) { + BISCUIT_ASSERT(IsManaged()); + + // No-op, just return. + if (new_capacity <= m_capacity) { + return; + } + + const auto cursor_offset = GetCursorOffset(); + +#ifdef BISCUIT_CODE_BUFFER_MMAP + auto* new_buffer = static_cast(mremap(m_buffer, m_capacity, new_capacity, MREMAP_MAYMOVE)); + BISCUIT_ASSERT(new_buffer != nullptr); +#else + auto* new_buffer = new uint8_t[new_capacity](); + std::memcpy(new_buffer, m_buffer, m_capacity); + delete[] m_buffer; +#endif + + m_buffer = new_buffer; + m_capacity = new_capacity; + m_cursor = m_buffer + cursor_offset; +} + +void CodeBuffer::SetExecutable() { +#ifdef BISCUIT_CODE_BUFFER_MMAP + const auto result = mprotect(m_buffer, m_capacity, PROT_READ | PROT_EXEC); + BISCUIT_ASSERT(result == 0); +#else + // Unimplemented/Unnecessary for new + BISCUIT_ASSERT(false); +#endif +} + +void CodeBuffer::SetWritable() { +#ifdef BISCUIT_CODE_BUFFER_MMAP + const auto result = mprotect(m_buffer, m_capacity, PROT_READ | PROT_WRITE); + BISCUIT_ASSERT(result == 0); +#else + // Unimplemented/Unnecessary for new + BISCUIT_ASSERT(false); +#endif +} + +} // namespace biscuit diff --git a/src/cpuinfo.cpp b/src/cpuinfo.cpp new file mode 100644 index 00000000..b0944603 --- /dev/null +++ b/src/cpuinfo.cpp @@ -0,0 +1,39 @@ +// Copyright (c), 2022, KNS Group LLC (YADRO) +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +#include + +namespace biscuit { + +bool CPUInfo::Has(RISCVExtension extension) const { +#if defined(__linux__) && defined(__riscv) + const static uint64_t features = getauxval(AT_HWCAP) & ( + COMPAT_HWCAP_ISA_I | + COMPAT_HWCAP_ISA_M | + COMPAT_HWCAP_ISA_A | + COMPAT_HWCAP_ISA_F | + COMPAT_HWCAP_ISA_D | + COMPAT_HWCAP_ISA_C | + COMPAT_HWCAP_ISA_V + ); +#else + const static uint64_t features = 0; +#endif + + return (features & static_cast(extension)) != 0; +} + +uint32_t CPUInfo::GetVlenb() const { + if(Has(RISCVExtension::V)) { + static CSRReader csrReader; + const static auto getVLEN = csrReader.GetCode(); + return getVLEN(); + } + + return 0; +} + +} // namespace biscuit diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..caaaed45 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,76 @@ +project(biscuit_tests) + +add_executable(${PROJECT_NAME} + src/assembler_bfloat_tests.cpp + src/assembler_branch_tests.cpp + src/assembler_cmo_tests.cpp + src/assembler_privileged_tests.cpp + src/assembler_rv32i_tests.cpp + src/assembler_rv64i_tests.cpp + src/assembler_rva_tests.cpp + src/assembler_rvb_tests.cpp + src/assembler_rvc_tests.cpp + src/assembler_rvd_tests.cpp + src/assembler_rvf_tests.cpp + src/assembler_rvk_tests.cpp + src/assembler_rvm_tests.cpp + src/assembler_rvq_tests.cpp + src/assembler_rvv_tests.cpp + src/assembler_vector_crypto_tests.cpp + src/assembler_zacas_tests.cpp + src/assembler_zawrs_tests.cpp + src/assembler_zc_tests.cpp + src/assembler_zfa_tests.cpp + src/assembler_zicond_tests.cpp + src/assembler_zicsr_tests.cpp + src/assembler_zihintntl_tests.cpp + src/main.cpp + + src/assembler_test_utils.hpp +) + +target_include_directories(${PROJECT_NAME} +PRIVATE + externals/ +) + +target_link_libraries(${PROJECT_NAME} +PRIVATE + biscuit +) + +target_compile_features(${PROJECT_NAME} +PRIVATE + cxx_std_20 +) + +if (MSVC) + target_compile_options(${PROJECT_NAME} + PRIVATE + /MP + /Zi + /Zo + /permissive- + /EHsc + /utf-8 + /volatile:iso + /Zc:externConstexpr + /Zc:inline + /Zc:throwingNew + + # Warnings + /W4 + /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled + /we4101 # 'identifier': unreferenced local variable + /we4265 # 'class': class has virtual functions, but destructor is not virtual + /we4388 # signed/unsigned mismatch + /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect + /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? + /we4555 # Expression has no effect; expected expression with side-effect + /we4715 # 'function': not all control paths return a value + /we4834 # Discarding return value of function with 'nodiscard' attribute + /we5038 # data member 'member1' will be initialized after data member 'member2' + ) +endif() + +add_test(biscuit_tests_ctest ${PROJECT_NAME}) \ No newline at end of file diff --git a/tests/externals/catch/catch.hpp b/tests/externals/catch/catch.hpp new file mode 100644 index 00000000..9b309bdd --- /dev/null +++ b/tests/externals/catch/catch.hpp @@ -0,0 +1,17976 @@ +/* + * Catch v2.13.10 + * Generated: 2022-10-16 11:01:23.452308 + * ---------------------------------------------------------- + * This file has been merged from multiple headers. Please don't edit it directly + * Copyright (c) 2022 Two Blue Cubes Ltd. All rights reserved. + * + * Distributed under the Boost Software License, Version 1.0. (See accompanying + * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED +#define TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED +// start catch.hpp + + +#define CATCH_VERSION_MAJOR 2 +#define CATCH_VERSION_MINOR 13 +#define CATCH_VERSION_PATCH 10 + +#ifdef __clang__ +# pragma clang system_header +#elif defined __GNUC__ +# pragma GCC system_header +#endif + +// start catch_suppress_warnings.h + +#ifdef __clang__ +# ifdef __ICC // icpc defines the __clang__ macro +# pragma warning(push) +# pragma warning(disable: 161 1682) +# else // __ICC +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wpadded" +# pragma clang diagnostic ignored "-Wswitch-enum" +# pragma clang diagnostic ignored "-Wcovered-switch-default" +# endif +#elif defined __GNUC__ + // Because REQUIREs trigger GCC's -Wparentheses, and because still + // supported version of g++ have only buggy support for _Pragmas, + // Wparentheses have to be suppressed globally. +# pragma GCC diagnostic ignored "-Wparentheses" // See #674 for details + +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-variable" +# pragma GCC diagnostic ignored "-Wpadded" +#endif +// end catch_suppress_warnings.h +#if defined(CATCH_CONFIG_MAIN) || defined(CATCH_CONFIG_RUNNER) +# define CATCH_IMPL +# define CATCH_CONFIG_ALL_PARTS +#endif + +// In the impl file, we want to have access to all parts of the headers +// Can also be used to sanely support PCHs +#if defined(CATCH_CONFIG_ALL_PARTS) +# define CATCH_CONFIG_EXTERNAL_INTERFACES +# if defined(CATCH_CONFIG_DISABLE_MATCHERS) +# undef CATCH_CONFIG_DISABLE_MATCHERS +# endif +# if !defined(CATCH_CONFIG_ENABLE_CHRONO_STRINGMAKER) +# define CATCH_CONFIG_ENABLE_CHRONO_STRINGMAKER +# endif +#endif + +#if !defined(CATCH_CONFIG_IMPL_ONLY) +// start catch_platform.h + +// See e.g.: +// https://opensource.apple.com/source/CarbonHeaders/CarbonHeaders-18.1/TargetConditionals.h.auto.html +#ifdef __APPLE__ +# include +# if (defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1) || \ + (defined(TARGET_OS_MAC) && TARGET_OS_MAC == 1) +# define CATCH_PLATFORM_MAC +# elif (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE == 1) +# define CATCH_PLATFORM_IPHONE +# endif + +#elif defined(linux) || defined(__linux) || defined(__linux__) +# define CATCH_PLATFORM_LINUX + +#elif defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) || defined(__MINGW32__) +# define CATCH_PLATFORM_WINDOWS +#endif + +// end catch_platform.h + +#ifdef CATCH_IMPL +# ifndef CLARA_CONFIG_MAIN +# define CLARA_CONFIG_MAIN_NOT_DEFINED +# define CLARA_CONFIG_MAIN +# endif +#endif + +// start catch_user_interfaces.h + +namespace Catch { + unsigned int rngSeed(); +} + +// end catch_user_interfaces.h +// start catch_tag_alias_autoregistrar.h + +// start catch_common.h + +// start catch_compiler_capabilities.h + +// Detect a number of compiler features - by compiler +// The following features are defined: +// +// CATCH_CONFIG_COUNTER : is the __COUNTER__ macro supported? +// CATCH_CONFIG_WINDOWS_SEH : is Windows SEH supported? +// CATCH_CONFIG_POSIX_SIGNALS : are POSIX signals supported? +// CATCH_CONFIG_DISABLE_EXCEPTIONS : Are exceptions enabled? +// **************** +// Note to maintainers: if new toggles are added please document them +// in configuration.md, too +// **************** + +// In general each macro has a _NO_ form +// (e.g. CATCH_CONFIG_NO_POSIX_SIGNALS) which disables the feature. +// Many features, at point of detection, define an _INTERNAL_ macro, so they +// can be combined, en-mass, with the _NO_ forms later. + +#ifdef __cplusplus + +# if (__cplusplus >= 201402L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) +# define CATCH_CPP14_OR_GREATER +# endif + +# if (__cplusplus >= 201703L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +# define CATCH_CPP17_OR_GREATER +# endif + +#endif + +// Only GCC compiler should be used in this block, so other compilers trying to +// mask themselves as GCC should be ignored. +#if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC) && !defined(__CUDACC__) && !defined(__LCC__) +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma( "GCC diagnostic push" ) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma( "GCC diagnostic pop" ) + +# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) (void)__builtin_constant_p(__VA_ARGS__) + +#endif + +#if defined(__clang__) + +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma( "clang diagnostic push" ) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma( "clang diagnostic pop" ) + +// As of this writing, IBM XL's implementation of __builtin_constant_p has a bug +// which results in calls to destructors being emitted for each temporary, +// without a matching initialization. In practice, this can result in something +// like `std::string::~string` being called on an uninitialized value. +// +// For example, this code will likely segfault under IBM XL: +// ``` +// REQUIRE(std::string("12") + "34" == "1234") +// ``` +// +// Therefore, `CATCH_INTERNAL_IGNORE_BUT_WARN` is not implemented. +# if !defined(__ibmxl__) && !defined(__CUDACC__) +# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) (void)__builtin_constant_p(__VA_ARGS__) /* NOLINT(cppcoreguidelines-pro-type-vararg, hicpp-vararg) */ +# endif + +# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wexit-time-destructors\"" ) \ + _Pragma( "clang diagnostic ignored \"-Wglobal-constructors\"") + +# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wparentheses\"" ) + +# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wunused-variable\"" ) + +# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wgnu-zero-variadic-macro-arguments\"" ) + +# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wunused-template\"" ) + +#endif // __clang__ + +//////////////////////////////////////////////////////////////////////////////// +// Assume that non-Windows platforms support posix signals by default +#if !defined(CATCH_PLATFORM_WINDOWS) + #define CATCH_INTERNAL_CONFIG_POSIX_SIGNALS +#endif + +//////////////////////////////////////////////////////////////////////////////// +// We know some environments not to support full POSIX signals +#if defined(__CYGWIN__) || defined(__QNX__) || defined(__EMSCRIPTEN__) || defined(__DJGPP__) + #define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS +#endif + +#ifdef __OS400__ +# define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS +# define CATCH_CONFIG_COLOUR_NONE +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Android somehow still does not support std::to_string +#if defined(__ANDROID__) +# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING +# define CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Not all Windows environments support SEH properly +#if defined(__MINGW32__) +# define CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH +#endif + +//////////////////////////////////////////////////////////////////////////////// +// PS4 +#if defined(__ORBIS__) +# define CATCH_INTERNAL_CONFIG_NO_NEW_CAPTURE +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Cygwin +#ifdef __CYGWIN__ + +// Required for some versions of Cygwin to declare gettimeofday +// see: http://stackoverflow.com/questions/36901803/gettimeofday-not-declared-in-this-scope-cygwin +# define _BSD_SOURCE +// some versions of cygwin (most) do not support std::to_string. Use the libstd check. +// https://gcc.gnu.org/onlinedocs/gcc-4.8.2/libstdc++/api/a01053_source.html line 2812-2813 +# if !((__cplusplus >= 201103L) && defined(_GLIBCXX_USE_C99) \ + && !defined(_GLIBCXX_HAVE_BROKEN_VSWPRINTF)) + +# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING + +# endif +#endif // __CYGWIN__ + +//////////////////////////////////////////////////////////////////////////////// +// Visual C++ +#if defined(_MSC_VER) + +// Universal Windows platform does not support SEH +// Or console colours (or console at all...) +# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) +# define CATCH_CONFIG_COLOUR_NONE +# else +# define CATCH_INTERNAL_CONFIG_WINDOWS_SEH +# endif + +# if !defined(__clang__) // Handle Clang masquerading for msvc + +// MSVC traditional preprocessor needs some workaround for __VA_ARGS__ +// _MSVC_TRADITIONAL == 0 means new conformant preprocessor +// _MSVC_TRADITIONAL == 1 means old traditional non-conformant preprocessor +# if !defined(_MSVC_TRADITIONAL) || (defined(_MSVC_TRADITIONAL) && _MSVC_TRADITIONAL) +# define CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +# endif // MSVC_TRADITIONAL + +// Only do this if we're not using clang on Windows, which uses `diagnostic push` & `diagnostic pop` +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION __pragma( warning(push) ) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION __pragma( warning(pop) ) +# endif // __clang__ + +#endif // _MSC_VER + +#if defined(_REENTRANT) || defined(_MSC_VER) +// Enable async processing, as -pthread is specified or no additional linking is required +# define CATCH_INTERNAL_CONFIG_USE_ASYNC +#endif // _MSC_VER + +//////////////////////////////////////////////////////////////////////////////// +// Check if we are compiled with -fno-exceptions or equivalent +#if defined(__EXCEPTIONS) || defined(__cpp_exceptions) || defined(_CPPUNWIND) +# define CATCH_INTERNAL_CONFIG_EXCEPTIONS_ENABLED +#endif + +//////////////////////////////////////////////////////////////////////////////// +// DJGPP +#ifdef __DJGPP__ +# define CATCH_INTERNAL_CONFIG_NO_WCHAR +#endif // __DJGPP__ + +//////////////////////////////////////////////////////////////////////////////// +// Embarcadero C++Build +#if defined(__BORLANDC__) + #define CATCH_INTERNAL_CONFIG_POLYFILL_ISNAN +#endif + +//////////////////////////////////////////////////////////////////////////////// + +// Use of __COUNTER__ is suppressed during code analysis in +// CLion/AppCode 2017.2.x and former, because __COUNTER__ is not properly +// handled by it. +// Otherwise all supported compilers support COUNTER macro, +// but user still might want to turn it off +#if ( !defined(__JETBRAINS_IDE__) || __JETBRAINS_IDE__ >= 20170300L ) + #define CATCH_INTERNAL_CONFIG_COUNTER +#endif + +//////////////////////////////////////////////////////////////////////////////// + +// RTX is a special version of Windows that is real time. +// This means that it is detected as Windows, but does not provide +// the same set of capabilities as real Windows does. +#if defined(UNDER_RTSS) || defined(RTX64_BUILD) + #define CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH + #define CATCH_INTERNAL_CONFIG_NO_ASYNC + #define CATCH_CONFIG_COLOUR_NONE +#endif + +#if !defined(_GLIBCXX_USE_C99_MATH_TR1) +#define CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER +#endif + +// Various stdlib support checks that require __has_include +#if defined(__has_include) + // Check if string_view is available and usable + #if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # define CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW + #endif + + // Check if optional is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # define CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) + + // Check if byte is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # include + # if defined(__cpp_lib_byte) && (__cpp_lib_byte > 0) + # define CATCH_INTERNAL_CONFIG_CPP17_BYTE + # endif + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) + + // Check if variant is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # if defined(__clang__) && (__clang_major__ < 8) + // work around clang bug with libstdc++ https://bugs.llvm.org/show_bug.cgi?id=31852 + // fix should be in clang 8, workaround in libstdc++ 8.2 + # include + # if defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) + # define CATCH_CONFIG_NO_CPP17_VARIANT + # else + # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT + # endif // defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) + # else + # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT + # endif // defined(__clang__) && (__clang_major__ < 8) + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) +#endif // defined(__has_include) + +#if defined(CATCH_INTERNAL_CONFIG_COUNTER) && !defined(CATCH_CONFIG_NO_COUNTER) && !defined(CATCH_CONFIG_COUNTER) +# define CATCH_CONFIG_COUNTER +#endif +#if defined(CATCH_INTERNAL_CONFIG_WINDOWS_SEH) && !defined(CATCH_CONFIG_NO_WINDOWS_SEH) && !defined(CATCH_CONFIG_WINDOWS_SEH) && !defined(CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH) +# define CATCH_CONFIG_WINDOWS_SEH +#endif +// This is set by default, because we assume that unix compilers are posix-signal-compatible by default. +#if defined(CATCH_INTERNAL_CONFIG_POSIX_SIGNALS) && !defined(CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_POSIX_SIGNALS) +# define CATCH_CONFIG_POSIX_SIGNALS +#endif +// This is set by default, because we assume that compilers with no wchar_t support are just rare exceptions. +#if !defined(CATCH_INTERNAL_CONFIG_NO_WCHAR) && !defined(CATCH_CONFIG_NO_WCHAR) && !defined(CATCH_CONFIG_WCHAR) +# define CATCH_CONFIG_WCHAR +#endif + +#if !defined(CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING) && !defined(CATCH_CONFIG_NO_CPP11_TO_STRING) && !defined(CATCH_CONFIG_CPP11_TO_STRING) +# define CATCH_CONFIG_CPP11_TO_STRING +#endif + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL) && !defined(CATCH_CONFIG_NO_CPP17_OPTIONAL) && !defined(CATCH_CONFIG_CPP17_OPTIONAL) +# define CATCH_CONFIG_CPP17_OPTIONAL +#endif + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_NO_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_CPP17_STRING_VIEW) +# define CATCH_CONFIG_CPP17_STRING_VIEW +#endif + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_VARIANT) && !defined(CATCH_CONFIG_NO_CPP17_VARIANT) && !defined(CATCH_CONFIG_CPP17_VARIANT) +# define CATCH_CONFIG_CPP17_VARIANT +#endif + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_BYTE) && !defined(CATCH_CONFIG_NO_CPP17_BYTE) && !defined(CATCH_CONFIG_CPP17_BYTE) +# define CATCH_CONFIG_CPP17_BYTE +#endif + +#if defined(CATCH_CONFIG_EXPERIMENTAL_REDIRECT) +# define CATCH_INTERNAL_CONFIG_NEW_CAPTURE +#endif + +#if defined(CATCH_INTERNAL_CONFIG_NEW_CAPTURE) && !defined(CATCH_INTERNAL_CONFIG_NO_NEW_CAPTURE) && !defined(CATCH_CONFIG_NO_NEW_CAPTURE) && !defined(CATCH_CONFIG_NEW_CAPTURE) +# define CATCH_CONFIG_NEW_CAPTURE +#endif + +#if !defined(CATCH_INTERNAL_CONFIG_EXCEPTIONS_ENABLED) && !defined(CATCH_CONFIG_DISABLE_EXCEPTIONS) +# define CATCH_CONFIG_DISABLE_EXCEPTIONS +#endif + +#if defined(CATCH_INTERNAL_CONFIG_POLYFILL_ISNAN) && !defined(CATCH_CONFIG_NO_POLYFILL_ISNAN) && !defined(CATCH_CONFIG_POLYFILL_ISNAN) +# define CATCH_CONFIG_POLYFILL_ISNAN +#endif + +#if defined(CATCH_INTERNAL_CONFIG_USE_ASYNC) && !defined(CATCH_INTERNAL_CONFIG_NO_ASYNC) && !defined(CATCH_CONFIG_NO_USE_ASYNC) && !defined(CATCH_CONFIG_USE_ASYNC) +# define CATCH_CONFIG_USE_ASYNC +#endif + +#if defined(CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_NO_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_ANDROID_LOGWRITE) +# define CATCH_CONFIG_ANDROID_LOGWRITE +#endif + +#if defined(CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_NO_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_GLOBAL_NEXTAFTER) +# define CATCH_CONFIG_GLOBAL_NEXTAFTER +#endif + +// Even if we do not think the compiler has that warning, we still have +// to provide a macro that can be used by the code. +#if !defined(CATCH_INTERNAL_START_WARNINGS_SUPPRESSION) +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION +#endif +#if !defined(CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS +#endif + +// The goal of this macro is to avoid evaluation of the arguments, but +// still have the compiler warn on problems inside... +#if !defined(CATCH_INTERNAL_IGNORE_BUT_WARN) +# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) +#endif + +#if defined(__APPLE__) && defined(__apple_build_version__) && (__clang_major__ < 10) +# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +#elif defined(__clang__) && (__clang_major__ < 5) +# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +#endif + +#if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +#endif + +#if defined(CATCH_CONFIG_DISABLE_EXCEPTIONS) +#define CATCH_TRY if ((true)) +#define CATCH_CATCH_ALL if ((false)) +#define CATCH_CATCH_ANON(type) if ((false)) +#else +#define CATCH_TRY try +#define CATCH_CATCH_ALL catch (...) +#define CATCH_CATCH_ANON(type) catch (type) +#endif + +#if defined(CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR) && !defined(CATCH_CONFIG_NO_TRADITIONAL_MSVC_PREPROCESSOR) && !defined(CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR) +#define CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +#endif + +// end catch_compiler_capabilities.h +#define INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) name##line +#define INTERNAL_CATCH_UNIQUE_NAME_LINE( name, line ) INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) +#ifdef CATCH_CONFIG_COUNTER +# define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __COUNTER__ ) +#else +# define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __LINE__ ) +#endif + +#include +#include +#include + +// We need a dummy global operator<< so we can bring it into Catch namespace later +struct Catch_global_namespace_dummy {}; +std::ostream& operator<<(std::ostream&, Catch_global_namespace_dummy); + +namespace Catch { + + struct CaseSensitive { enum Choice { + Yes, + No + }; }; + + class NonCopyable { + NonCopyable( NonCopyable const& ) = delete; + NonCopyable( NonCopyable && ) = delete; + NonCopyable& operator = ( NonCopyable const& ) = delete; + NonCopyable& operator = ( NonCopyable && ) = delete; + + protected: + NonCopyable(); + virtual ~NonCopyable(); + }; + + struct SourceLineInfo { + + SourceLineInfo() = delete; + SourceLineInfo( char const* _file, std::size_t _line ) noexcept + : file( _file ), + line( _line ) + {} + + SourceLineInfo( SourceLineInfo const& other ) = default; + SourceLineInfo& operator = ( SourceLineInfo const& ) = default; + SourceLineInfo( SourceLineInfo&& ) noexcept = default; + SourceLineInfo& operator = ( SourceLineInfo&& ) noexcept = default; + + bool empty() const noexcept { return file[0] == '\0'; } + bool operator == ( SourceLineInfo const& other ) const noexcept; + bool operator < ( SourceLineInfo const& other ) const noexcept; + + char const* file; + std::size_t line; + }; + + std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ); + + // Bring in operator<< from global namespace into Catch namespace + // This is necessary because the overload of operator<< above makes + // lookup stop at namespace Catch + using ::operator<<; + + // Use this in variadic streaming macros to allow + // >> +StreamEndStop + // as well as + // >> stuff +StreamEndStop + struct StreamEndStop { + std::string operator+() const; + }; + template + T const& operator + ( T const& value, StreamEndStop ) { + return value; + } +} + +#define CATCH_INTERNAL_LINEINFO \ + ::Catch::SourceLineInfo( __FILE__, static_cast( __LINE__ ) ) + +// end catch_common.h +namespace Catch { + + struct RegistrarForTagAliases { + RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ); + }; + +} // end namespace Catch + +#define CATCH_REGISTER_TAG_ALIAS( alias, spec ) \ + CATCH_INTERNAL_START_WARNINGS_SUPPRESSION \ + CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ + namespace{ Catch::RegistrarForTagAliases INTERNAL_CATCH_UNIQUE_NAME( AutoRegisterTagAlias )( alias, spec, CATCH_INTERNAL_LINEINFO ); } \ + CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION + +// end catch_tag_alias_autoregistrar.h +// start catch_test_registry.h + +// start catch_interfaces_testcase.h + +#include + +namespace Catch { + + class TestSpec; + + struct ITestInvoker { + virtual void invoke () const = 0; + virtual ~ITestInvoker(); + }; + + class TestCase; + struct IConfig; + + struct ITestCaseRegistry { + virtual ~ITestCaseRegistry(); + virtual std::vector const& getAllTests() const = 0; + virtual std::vector const& getAllTestsSorted( IConfig const& config ) const = 0; + }; + + bool isThrowSafe( TestCase const& testCase, IConfig const& config ); + bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ); + std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ); + std::vector const& getAllTestCasesSorted( IConfig const& config ); + +} + +// end catch_interfaces_testcase.h +// start catch_stringref.h + +#include +#include +#include +#include + +namespace Catch { + + /// A non-owning string class (similar to the forthcoming std::string_view) + /// Note that, because a StringRef may be a substring of another string, + /// it may not be null terminated. + class StringRef { + public: + using size_type = std::size_t; + using const_iterator = const char*; + + private: + static constexpr char const* const s_empty = ""; + + char const* m_start = s_empty; + size_type m_size = 0; + + public: // construction + constexpr StringRef() noexcept = default; + + StringRef( char const* rawChars ) noexcept; + + constexpr StringRef( char const* rawChars, size_type size ) noexcept + : m_start( rawChars ), + m_size( size ) + {} + + StringRef( std::string const& stdString ) noexcept + : m_start( stdString.c_str() ), + m_size( stdString.size() ) + {} + + explicit operator std::string() const { + return std::string(m_start, m_size); + } + + public: // operators + auto operator == ( StringRef const& other ) const noexcept -> bool; + auto operator != (StringRef const& other) const noexcept -> bool { + return !(*this == other); + } + + auto operator[] ( size_type index ) const noexcept -> char { + assert(index < m_size); + return m_start[index]; + } + + public: // named queries + constexpr auto empty() const noexcept -> bool { + return m_size == 0; + } + constexpr auto size() const noexcept -> size_type { + return m_size; + } + + // Returns the current start pointer. If the StringRef is not + // null-terminated, throws std::domain_exception + auto c_str() const -> char const*; + + public: // substrings and searches + // Returns a substring of [start, start + length). + // If start + length > size(), then the substring is [start, size()). + // If start > size(), then the substring is empty. + auto substr( size_type start, size_type length ) const noexcept -> StringRef; + + // Returns the current start pointer. May not be null-terminated. + auto data() const noexcept -> char const*; + + constexpr auto isNullTerminated() const noexcept -> bool { + return m_start[m_size] == '\0'; + } + + public: // iterators + constexpr const_iterator begin() const { return m_start; } + constexpr const_iterator end() const { return m_start + m_size; } + }; + + auto operator += ( std::string& lhs, StringRef const& sr ) -> std::string&; + auto operator << ( std::ostream& os, StringRef const& sr ) -> std::ostream&; + + constexpr auto operator "" _sr( char const* rawChars, std::size_t size ) noexcept -> StringRef { + return StringRef( rawChars, size ); + } +} // namespace Catch + +constexpr auto operator "" _catch_sr( char const* rawChars, std::size_t size ) noexcept -> Catch::StringRef { + return Catch::StringRef( rawChars, size ); +} + +// end catch_stringref.h +// start catch_preprocessor.hpp + + +#define CATCH_RECURSION_LEVEL0(...) __VA_ARGS__ +#define CATCH_RECURSION_LEVEL1(...) CATCH_RECURSION_LEVEL0(CATCH_RECURSION_LEVEL0(CATCH_RECURSION_LEVEL0(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL2(...) CATCH_RECURSION_LEVEL1(CATCH_RECURSION_LEVEL1(CATCH_RECURSION_LEVEL1(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL3(...) CATCH_RECURSION_LEVEL2(CATCH_RECURSION_LEVEL2(CATCH_RECURSION_LEVEL2(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL4(...) CATCH_RECURSION_LEVEL3(CATCH_RECURSION_LEVEL3(CATCH_RECURSION_LEVEL3(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL5(...) CATCH_RECURSION_LEVEL4(CATCH_RECURSION_LEVEL4(CATCH_RECURSION_LEVEL4(__VA_ARGS__))) + +#ifdef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +#define INTERNAL_CATCH_EXPAND_VARGS(...) __VA_ARGS__ +// MSVC needs more evaluations +#define CATCH_RECURSION_LEVEL6(...) CATCH_RECURSION_LEVEL5(CATCH_RECURSION_LEVEL5(CATCH_RECURSION_LEVEL5(__VA_ARGS__))) +#define CATCH_RECURSE(...) CATCH_RECURSION_LEVEL6(CATCH_RECURSION_LEVEL6(__VA_ARGS__)) +#else +#define CATCH_RECURSE(...) CATCH_RECURSION_LEVEL5(__VA_ARGS__) +#endif + +#define CATCH_REC_END(...) +#define CATCH_REC_OUT + +#define CATCH_EMPTY() +#define CATCH_DEFER(id) id CATCH_EMPTY() + +#define CATCH_REC_GET_END2() 0, CATCH_REC_END +#define CATCH_REC_GET_END1(...) CATCH_REC_GET_END2 +#define CATCH_REC_GET_END(...) CATCH_REC_GET_END1 +#define CATCH_REC_NEXT0(test, next, ...) next CATCH_REC_OUT +#define CATCH_REC_NEXT1(test, next) CATCH_DEFER ( CATCH_REC_NEXT0 ) ( test, next, 0) +#define CATCH_REC_NEXT(test, next) CATCH_REC_NEXT1(CATCH_REC_GET_END test, next) + +#define CATCH_REC_LIST0(f, x, peek, ...) , f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1) ) ( f, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST1(f, x, peek, ...) , f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST0) ) ( f, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST2(f, x, peek, ...) f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1) ) ( f, peek, __VA_ARGS__ ) + +#define CATCH_REC_LIST0_UD(f, userdata, x, peek, ...) , f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1_UD) ) ( f, userdata, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST1_UD(f, userdata, x, peek, ...) , f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST0_UD) ) ( f, userdata, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST2_UD(f, userdata, x, peek, ...) f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1_UD) ) ( f, userdata, peek, __VA_ARGS__ ) + +// Applies the function macro `f` to each of the remaining parameters, inserts commas between the results, +// and passes userdata as the first parameter to each invocation, +// e.g. CATCH_REC_LIST_UD(f, x, a, b, c) evaluates to f(x, a), f(x, b), f(x, c) +#define CATCH_REC_LIST_UD(f, userdata, ...) CATCH_RECURSE(CATCH_REC_LIST2_UD(f, userdata, __VA_ARGS__, ()()(), ()()(), ()()(), 0)) + +#define CATCH_REC_LIST(f, ...) CATCH_RECURSE(CATCH_REC_LIST2(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0)) + +#define INTERNAL_CATCH_EXPAND1(param) INTERNAL_CATCH_EXPAND2(param) +#define INTERNAL_CATCH_EXPAND2(...) INTERNAL_CATCH_NO## __VA_ARGS__ +#define INTERNAL_CATCH_DEF(...) INTERNAL_CATCH_DEF __VA_ARGS__ +#define INTERNAL_CATCH_NOINTERNAL_CATCH_DEF +#define INTERNAL_CATCH_STRINGIZE(...) INTERNAL_CATCH_STRINGIZE2(__VA_ARGS__) +#ifndef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +#define INTERNAL_CATCH_STRINGIZE2(...) #__VA_ARGS__ +#define INTERNAL_CATCH_STRINGIZE_WITHOUT_PARENS(param) INTERNAL_CATCH_STRINGIZE(INTERNAL_CATCH_REMOVE_PARENS(param)) +#else +// MSVC is adding extra space and needs another indirection to expand INTERNAL_CATCH_NOINTERNAL_CATCH_DEF +#define INTERNAL_CATCH_STRINGIZE2(...) INTERNAL_CATCH_STRINGIZE3(__VA_ARGS__) +#define INTERNAL_CATCH_STRINGIZE3(...) #__VA_ARGS__ +#define INTERNAL_CATCH_STRINGIZE_WITHOUT_PARENS(param) (INTERNAL_CATCH_STRINGIZE(INTERNAL_CATCH_REMOVE_PARENS(param)) + 1) +#endif + +#define INTERNAL_CATCH_MAKE_NAMESPACE2(...) ns_##__VA_ARGS__ +#define INTERNAL_CATCH_MAKE_NAMESPACE(name) INTERNAL_CATCH_MAKE_NAMESPACE2(name) + +#define INTERNAL_CATCH_REMOVE_PARENS(...) INTERNAL_CATCH_EXPAND1(INTERNAL_CATCH_DEF __VA_ARGS__) + +#ifndef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) decltype(get_wrapper()) +#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__)) +#else +#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) INTERNAL_CATCH_EXPAND_VARGS(decltype(get_wrapper())) +#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_EXPAND_VARGS(INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__))) +#endif + +#define INTERNAL_CATCH_MAKE_TYPE_LISTS_FROM_TYPES(...)\ + CATCH_REC_LIST(INTERNAL_CATCH_MAKE_TYPE_LIST,__VA_ARGS__) + +#define INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_0) INTERNAL_CATCH_REMOVE_PARENS(_0) +#define INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_0, _1) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_1) +#define INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_0, _1, _2) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_1, _2) +#define INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_0, _1, _2, _3) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_1, _2, _3) +#define INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_0, _1, _2, _3, _4) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_1, _2, _3, _4) +#define INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_0, _1, _2, _3, _4, _5) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_1, _2, _3, _4, _5) +#define INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_0, _1, _2, _3, _4, _5, _6) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_1, _2, _3, _4, _5, _6) +#define INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_0, _1, _2, _3, _4, _5, _6, _7) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_1, _2, _3, _4, _5, _6, _7) +#define INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_1, _2, _3, _4, _5, _6, _7, _8) +#define INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9) +#define INTERNAL_CATCH_REMOVE_PARENS_11_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10) + +#define INTERNAL_CATCH_VA_NARGS_IMPL(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N + +#define INTERNAL_CATCH_TYPE_GEN\ + template struct TypeList {};\ + template\ + constexpr auto get_wrapper() noexcept -> TypeList { return {}; }\ + template class...> struct TemplateTypeList{};\ + template class...Cs>\ + constexpr auto get_wrapper() noexcept -> TemplateTypeList { return {}; }\ + template\ + struct append;\ + template\ + struct rewrap;\ + template class, typename...>\ + struct create;\ + template class, typename>\ + struct convert;\ + \ + template \ + struct append { using type = T; };\ + template< template class L1, typename...E1, template class L2, typename...E2, typename...Rest>\ + struct append, L2, Rest...> { using type = typename append, Rest...>::type; };\ + template< template class L1, typename...E1, typename...Rest>\ + struct append, TypeList, Rest...> { using type = L1; };\ + \ + template< template class Container, template class List, typename...elems>\ + struct rewrap, List> { using type = TypeList>; };\ + template< template class Container, template class List, class...Elems, typename...Elements>\ + struct rewrap, List, Elements...> { using type = typename append>, typename rewrap, Elements...>::type>::type; };\ + \ + template