Add .clang-format file
Using clang-format version 12.0.0
This commit is contained in:
parent
51b155df92
commit
53493b2024
315 changed files with 3178 additions and 2660 deletions
218
.clang-format
Normal file
218
.clang-format
Normal file
|
@ -0,0 +1,218 @@
|
||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
AccessModifierOffset: -4
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignConsecutiveMacros: None
|
||||||
|
AlignConsecutiveAssignments: None
|
||||||
|
AlignConsecutiveBitFields: None
|
||||||
|
AlignConsecutiveDeclarations: None
|
||||||
|
AlignConsecutiveMacros: None
|
||||||
|
AlignEscapedNewlines: Right
|
||||||
|
AlignOperands: AlignAfterOperator
|
||||||
|
AlignTrailingComments: true
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllConstructorInitializersOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: true
|
||||||
|
AllowShortEnumsOnASingleLine: true
|
||||||
|
AllowShortBlocksOnASingleLine: Empty
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: Inline
|
||||||
|
AllowShortLambdasOnASingleLine: All
|
||||||
|
AllowShortIfStatementsOnASingleLine: Never
|
||||||
|
AllowShortLoopsOnASingleLine: false
|
||||||
|
AlwaysBreakAfterDefinitionReturnType: None
|
||||||
|
AlwaysBreakAfterReturnType: None
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
AlwaysBreakTemplateDeclarations: Yes
|
||||||
|
AttributeMacros:
|
||||||
|
- __capability
|
||||||
|
BinPackArguments: true
|
||||||
|
BinPackParameters: false
|
||||||
|
BitFieldColonSpacing: Both
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: false
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: Never
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
BeforeLambdaBody: false
|
||||||
|
BeforeWhile: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: false
|
||||||
|
SplitEmptyNamespace: false
|
||||||
|
BreakBeforeBinaryOperators: All
|
||||||
|
BreakBeforeBraces: Custom
|
||||||
|
BreakBeforeConceptDeclarations: true
|
||||||
|
BreakBeforeTernaryOperators: true
|
||||||
|
BreakBeforeInheritanceComma: false
|
||||||
|
BreakConstructorInitializersBeforeComma: true
|
||||||
|
BreakConstructorInitializers: BeforeComma
|
||||||
|
BreakInheritanceList: BeforeComma
|
||||||
|
BreakAfterJavaFieldAnnotations: false
|
||||||
|
BreakStringLiterals: true
|
||||||
|
ColumnLimit: 0
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||||
|
ConstructorInitializerIndentWidth: 8
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: true
|
||||||
|
DeriveLineEnding: true
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
# EmptyLineAfterAccessModifier: Leave
|
||||||
|
EmptyLineBeforeAccessModifier: Always
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
FixNamespaceComments: true
|
||||||
|
ForEachMacros:
|
||||||
|
- foreach
|
||||||
|
- Q_FOREACH
|
||||||
|
- BOOST_FOREACH
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^<mach/'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '^<windows.h>'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '^<([^\.])*>$'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '^<.*\.'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 4
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
# IndentAccessModifiers: false
|
||||||
|
IndentCaseBlocks: false
|
||||||
|
IndentCaseLabels: false
|
||||||
|
IndentExternBlock: NoIndent
|
||||||
|
IndentGotoLabels: false
|
||||||
|
IndentPPDirectives: AfterHash
|
||||||
|
IndentRequires: false
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
# InsertTrailingCommas: None
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
MacroBlockBegin: ''
|
||||||
|
MacroBlockEnd: ''
|
||||||
|
MaxEmptyLinesToKeep: 1
|
||||||
|
NamespaceIndentation: None
|
||||||
|
NamespaceMacros:
|
||||||
|
ObjCBinPackProtocolList: Never
|
||||||
|
ObjCBlockIndentWidth: 2
|
||||||
|
ObjCBreakBeforeNestedBlockParam: true
|
||||||
|
ObjCSpaceAfterProperty: false
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 1
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 200
|
||||||
|
PenaltyIndentedWhitespace: 0
|
||||||
|
PointerAlignment: Left
|
||||||
|
RawStringFormats:
|
||||||
|
- Language: Cpp
|
||||||
|
Delimiters:
|
||||||
|
- cc
|
||||||
|
- CC
|
||||||
|
- cpp
|
||||||
|
- Cpp
|
||||||
|
- CPP
|
||||||
|
- 'c++'
|
||||||
|
- 'C++'
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
BasedOnStyle: google
|
||||||
|
- Language: TextProto
|
||||||
|
Delimiters:
|
||||||
|
- pb
|
||||||
|
- PB
|
||||||
|
- proto
|
||||||
|
- PROTO
|
||||||
|
EnclosingFunctions:
|
||||||
|
- EqualsProto
|
||||||
|
- EquivToProto
|
||||||
|
- PARSE_PARTIAL_TEXT_PROTO
|
||||||
|
- PARSE_TEST_PROTO
|
||||||
|
- PARSE_TEXT_PROTO
|
||||||
|
- ParseTextOrDie
|
||||||
|
- ParseTextProtoOrDie
|
||||||
|
- ParseTestProto
|
||||||
|
- ParsePartialTestProto
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
BasedOnStyle: google
|
||||||
|
ReflowComments: true
|
||||||
|
# ShortNamespaceLines: 5
|
||||||
|
SortIncludes: true
|
||||||
|
SortJavaStaticImport: Before
|
||||||
|
SortUsingDeclarations: true
|
||||||
|
SpaceAfterCStyleCast: false
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: false
|
||||||
|
SpaceAroundPointerQualifiers: Default
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCaseColon: false
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceAroundPointerQualifiers: Default
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 2
|
||||||
|
SpacesInAngles: false
|
||||||
|
SpacesInConditionalStatement: false
|
||||||
|
SpacesInCStyleCastParentheses: false
|
||||||
|
SpacesInConditionalStatement: false
|
||||||
|
SpacesInContainerLiterals: false
|
||||||
|
# SpacesInLineCommentPrefix: -1
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
Standard: Latest
|
||||||
|
StatementAttributeLikeMacros:
|
||||||
|
- Q_EMIT
|
||||||
|
StatementMacros:
|
||||||
|
- Q_UNUSED
|
||||||
|
- QT_REQUIRE_VERSION
|
||||||
|
TabWidth: 4
|
||||||
|
TypenameMacros:
|
||||||
|
UseCRLF: false
|
||||||
|
UseTab: Never
|
||||||
|
WhitespaceSensitiveMacros:
|
||||||
|
- STRINGIZE
|
||||||
|
- PP_STRINGIZE
|
||||||
|
- BOOST_PP_STRINGIZE
|
||||||
|
- NS_SWIFT_NAME
|
||||||
|
- CF_SWIFT_NAME
|
||||||
|
- FCODE
|
||||||
|
- ICODE
|
||||||
|
...
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/a32_emit_x64.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -11,7 +13,6 @@
|
||||||
#include <fmt/ostream.h>
|
#include <fmt/ostream.h>
|
||||||
#include <mp/traits/integer_of_size.h>
|
#include <mp/traits/integer_of_size.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/a32_emit_x64.h"
|
|
||||||
#include "dynarmic/backend/x64/a32_jitstate.h"
|
#include "dynarmic/backend/x64/a32_jitstate.h"
|
||||||
#include "dynarmic/backend/x64/abi.h"
|
#include "dynarmic/backend/x64/abi.h"
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
|
@ -61,7 +62,7 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
|
||||||
}
|
}
|
||||||
|
|
||||||
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
||||||
: EmitContext(reg_alloc, block), conf(conf) {}
|
: EmitContext(reg_alloc, block), conf(conf) {}
|
||||||
|
|
||||||
A32::LocationDescriptor A32EmitContext::Location() const {
|
A32::LocationDescriptor A32EmitContext::Location() const {
|
||||||
return A32::LocationDescriptor{block.Location()};
|
return A32::LocationDescriptor{block.Location()};
|
||||||
|
@ -87,7 +88,7 @@ A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_in
|
||||||
code.PreludeComplete();
|
code.PreludeComplete();
|
||||||
ClearFastDispatchTable();
|
ClearFastDispatchTable();
|
||||||
|
|
||||||
exception_handler.SetFastmemCallback([this](u64 rip_){
|
exception_handler.SetFastmemCallback([this](u64 rip_) {
|
||||||
return FastmemCallback(rip_);
|
return FastmemCallback(rip_);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -98,7 +99,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
||||||
code.EnableWriting();
|
code.EnableWriting();
|
||||||
SCOPE_EXIT { code.DisableWriting(); };
|
SCOPE_EXIT { code.DisableWriting(); };
|
||||||
|
|
||||||
static const std::vector<HostLoc> gpr_order = [this]{
|
static const std::vector<HostLoc> gpr_order = [this] {
|
||||||
std::vector<HostLoc> gprs{any_gpr};
|
std::vector<HostLoc> gprs{any_gpr};
|
||||||
if (conf.page_table) {
|
if (conf.page_table) {
|
||||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||||
|
@ -126,15 +127,14 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
||||||
|
|
||||||
// Call the relevant Emit* member function.
|
// Call the relevant Emit* member function.
|
||||||
switch (inst->GetOpcode()) {
|
switch (inst->GetOpcode()) {
|
||||||
|
#define OPCODE(name, type, ...) \
|
||||||
#define OPCODE(name, type, ...) \
|
case IR::Opcode::name: \
|
||||||
case IR::Opcode::name: \
|
A32EmitX64::Emit##name(ctx, inst); \
|
||||||
A32EmitX64::Emit##name(ctx, inst); \
|
break;
|
||||||
break;
|
#define A32OPC(name, type, ...) \
|
||||||
#define A32OPC(name, type, ...) \
|
case IR::Opcode::A32##name: \
|
||||||
case IR::Opcode::A32##name: \
|
A32EmitX64::EmitA32##name(ctx, inst); \
|
||||||
A32EmitX64::EmitA32##name(ctx, inst); \
|
break;
|
||||||
break;
|
|
||||||
#define A64OPC(...)
|
#define A64OPC(...)
|
||||||
#include "dynarmic/ir/opcodes.inc"
|
#include "dynarmic/ir/opcodes.inc"
|
||||||
#undef OPCODE
|
#undef OPCODE
|
||||||
|
@ -216,7 +216,7 @@ void A32EmitX64::GenFastmemFallbacks() {
|
||||||
for (int value_idx : idxes) {
|
for (int value_idx : idxes) {
|
||||||
for (const auto& [bitsize, callback] : read_callbacks) {
|
for (const auto& [bitsize, callback] : read_callbacks) {
|
||||||
code.align();
|
code.align();
|
||||||
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
|
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
|
||||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
|
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
|
||||||
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
||||||
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
||||||
|
@ -232,7 +232,7 @@ void A32EmitX64::GenFastmemFallbacks() {
|
||||||
|
|
||||||
for (const auto& [bitsize, callback] : write_callbacks) {
|
for (const auto& [bitsize, callback] : write_callbacks) {
|
||||||
code.align();
|
code.align();
|
||||||
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
|
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
|
||||||
ABI_PushCallerSaveRegistersAndAdjustStack(code);
|
ABI_PushCallerSaveRegistersAndAdjustStack(code);
|
||||||
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
|
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
|
||||||
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
|
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
|
||||||
|
@ -310,7 +310,7 @@ void A32EmitX64::GenTerminalHandlers() {
|
||||||
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
|
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
|
||||||
|
|
||||||
code.align();
|
code.align();
|
||||||
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>();
|
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
|
||||||
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32());
|
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32());
|
||||||
|
@ -728,7 +728,7 @@ void A32EmitX64::EmitA32DataMemoryBarrier(A32EmitContext&, IR::Inst*) {
|
||||||
|
|
||||||
void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, IR::Inst*) {
|
void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, IR::Inst*) {
|
||||||
if (!conf.hook_isb) {
|
if (!conf.hook_isb) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.HostCall(nullptr);
|
ctx.reg_alloc.HostCall(nullptr);
|
||||||
|
@ -766,7 +766,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.mov(mask, new_pc);
|
code.mov(mask, new_pc);
|
||||||
code.and_(mask, 1);
|
code.and_(mask, 1);
|
||||||
code.lea(new_upper, ptr[mask.cvt64() + upper_without_t]);
|
code.lea(new_upper, ptr[mask.cvt64() + upper_without_t]);
|
||||||
code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
||||||
code.and_(new_pc, mask);
|
code.and_(new_pc, mask);
|
||||||
code.mov(MJitStateReg(A32::Reg::PC), new_pc);
|
code.mov(MJitStateReg(A32::Reg::PC), new_pc);
|
||||||
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||||
|
@ -1021,7 +1021,7 @@ void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbya
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
template<std::size_t bitsize, auto callback>
|
template<std::size_t bitsize, auto callback>
|
||||||
void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1048,8 +1048,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
Common::BitCast<u64>(code.getCurr()),
|
Common::BitCast<u64>(code.getCurr()),
|
||||||
Common::BitCast<u64>(wrapped_fn),
|
Common::BitCast<u64>(wrapped_fn),
|
||||||
*marker,
|
*marker,
|
||||||
}
|
});
|
||||||
);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, value);
|
ctx.reg_alloc.DefineValue(inst, value);
|
||||||
return;
|
return;
|
||||||
|
@ -1095,8 +1094,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
Common::BitCast<u64>(code.getCurr()),
|
Common::BitCast<u64>(code.getCurr()),
|
||||||
Common::BitCast<u64>(wrapped_fn),
|
Common::BitCast<u64>(wrapped_fn),
|
||||||
*marker,
|
*marker,
|
||||||
}
|
});
|
||||||
);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1146,7 +1144,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
WriteMemory<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst);
|
WriteMemory<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t bitsize, auto callback>
|
template<size_t bitsize, auto callback>
|
||||||
void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
using T = mp::unsigned_integer_of_size<bitsize>;
|
using T = mp::unsigned_integer_of_size<bitsize>;
|
||||||
|
|
||||||
|
@ -1162,11 +1160,10 @@ void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
|
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
|
||||||
return (conf.callbacks->*callback)(vaddr);
|
return (conf.callbacks->*callback)(vaddr);
|
||||||
});
|
});
|
||||||
}
|
});
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t bitsize, auto callback>
|
template<size_t bitsize, auto callback>
|
||||||
void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
using T = mp::unsigned_integer_of_size<bitsize>;
|
using T = mp::unsigned_integer_of_size<bitsize>;
|
||||||
|
|
||||||
|
@ -1185,11 +1182,12 @@ void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.CallLambda(
|
code.CallLambda(
|
||||||
[](A32::UserConfig& conf, u32 vaddr, T value) -> u32 {
|
[](A32::UserConfig& conf, u32 vaddr, T value) -> u32 {
|
||||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
||||||
[&](T expected) -> bool {
|
[&](T expected) -> bool {
|
||||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||||
}) ? 0 : 1;
|
})
|
||||||
}
|
? 0
|
||||||
);
|
: 1;
|
||||||
|
});
|
||||||
code.L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1229,10 +1227,7 @@ static void EmitCoprocessorException() {
|
||||||
ASSERT_FALSE("Should raise coproc exception here");
|
ASSERT_FALSE("Should raise coproc exception here");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface,
|
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
|
||||||
A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr,
|
|
||||||
std::optional<Argument::copyable_reference> arg0 = {},
|
|
||||||
std::optional<Argument::copyable_reference> arg1 = {}) {
|
|
||||||
reg_alloc.HostCall(inst, {}, {}, arg0, arg1);
|
reg_alloc.HostCall(inst, {}, {}, arg0, arg1);
|
||||||
|
|
||||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(jit_interface));
|
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(jit_interface));
|
||||||
|
@ -1519,7 +1514,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc
|
||||||
code.mov(MJitStateReg(A32::Reg::PC), code.ABI_PARAM2.cvt32());
|
code.mov(MJitStateReg(A32::Reg::PC), code.ABI_PARAM2.cvt32());
|
||||||
code.SwitchMxcsrOnExit();
|
code.SwitchMxcsrOnExit();
|
||||||
Devirtualize<&A32::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code);
|
Devirtualize<&A32::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code);
|
||||||
code.ReturnFromRunCode(true); // TODO: Check cycles
|
code.ReturnFromRunCode(true); // TODO: Check cycles
|
||||||
}
|
}
|
||||||
|
|
||||||
void A32EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
void A32EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
||||||
|
@ -1532,7 +1527,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
|
||||||
};
|
};
|
||||||
|
|
||||||
const u32 old_upper = get_upper(old_location);
|
const u32 old_upper = get_upper(old_location);
|
||||||
const u32 new_upper = [&]{
|
const u32 new_upper = [&] {
|
||||||
const u32 mask = ~u32(conf.always_little_endian ? 0x2 : 0);
|
const u32 mask = ~u32(conf.always_little_endian ? 0x2 : 0);
|
||||||
return get_upper(new_location) & mask;
|
return get_upper(new_location) & mask;
|
||||||
}();
|
}();
|
||||||
|
@ -1666,4 +1661,4 @@ void A32EmitX64::Unpatch(const IR::LocationDescriptor& location) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -71,8 +71,8 @@ protected:
|
||||||
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
|
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
|
||||||
void ClearFastDispatchTable();
|
void ClearFastDispatchTable();
|
||||||
|
|
||||||
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks;
|
std::map<std::tuple<size_t, int, int>, void (*)()> read_fallbacks;
|
||||||
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks;
|
std::map<std::tuple<size_t, int, int>, void (*)()> write_fallbacks;
|
||||||
void GenFastmemFallbacks();
|
void GenFastmemFallbacks();
|
||||||
|
|
||||||
const void* terminal_handler_pop_rsb_hint;
|
const void* terminal_handler_pop_rsb_hint;
|
||||||
|
@ -133,4 +133,4 @@ protected:
|
||||||
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
|
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -55,8 +55,7 @@ struct Jit::Impl {
|
||||||
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
||||||
, emitter(block_of_code, conf, jit)
|
, emitter(block_of_code, conf, jit)
|
||||||
, conf(std::move(conf))
|
, conf(std::move(conf))
|
||||||
, jit_interface(jit)
|
, jit_interface(jit) {}
|
||||||
{}
|
|
||||||
|
|
||||||
A32JitState jit_state;
|
A32JitState jit_state;
|
||||||
BlockOfCode block_of_code;
|
BlockOfCode block_of_code;
|
||||||
|
@ -70,7 +69,7 @@ struct Jit::Impl {
|
||||||
bool invalidate_entire_cache = false;
|
bool invalidate_entire_cache = false;
|
||||||
|
|
||||||
void Execute() {
|
void Execute() {
|
||||||
const CodePtr current_codeptr = [this]{
|
const CodePtr current_codeptr = [this] {
|
||||||
// RSB optimization
|
// RSB optimization
|
||||||
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
|
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
|
||||||
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||||
|
@ -176,7 +175,8 @@ private:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Jit::Jit(UserConfig conf) : impl(std::make_unique<Impl>(this, std::move(conf))) {}
|
Jit::Jit(UserConfig conf)
|
||||||
|
: impl(std::make_unique<Impl>(this, std::move(conf))) {}
|
||||||
|
|
||||||
Jit::~Jit() = default;
|
Jit::~Jit() = default;
|
||||||
|
|
||||||
|
@ -269,10 +269,15 @@ struct Context::Impl {
|
||||||
size_t invalid_cache_generation;
|
size_t invalid_cache_generation;
|
||||||
};
|
};
|
||||||
|
|
||||||
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
|
Context::Context()
|
||||||
|
: impl(std::make_unique<Context::Impl>()) {
|
||||||
|
impl->jit_state.ResetRSB();
|
||||||
|
}
|
||||||
Context::~Context() = default;
|
Context::~Context() = default;
|
||||||
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
|
Context::Context(const Context& ctx)
|
||||||
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
|
: impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
|
||||||
|
Context::Context(Context&& ctx) noexcept
|
||||||
|
: impl(std::move(ctx.impl)) {}
|
||||||
Context& Context::operator=(const Context& ctx) {
|
Context& Context::operator=(const Context& ctx) {
|
||||||
*impl = *ctx.impl;
|
*impl = *ctx.impl;
|
||||||
return *this;
|
return *this;
|
||||||
|
@ -323,4 +328,4 @@ std::string Jit::Disassemble() const {
|
||||||
return Common::DisassembleX64(impl->block_of_code.GetCodeBegin(), impl->block_of_code.getCurr());
|
return Common::DisassembleX64(impl->block_of_code.GetCodeBegin(), impl->block_of_code.getCurr());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::A32
|
} // namespace Dynarmic::A32
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/a32_jitstate.h"
|
#include "dynarmic/backend/x64/a32_jitstate.h"
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
#include "dynarmic/backend/x64/nzcv_util.h"
|
#include "dynarmic/backend/x64/nzcv_util.h"
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
|
@ -89,7 +90,7 @@ void A32JitState::SetCpsr(u32 cpsr) {
|
||||||
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
|
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
|
||||||
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
|
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
|
||||||
// IT state
|
// IT state
|
||||||
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
|
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
|
||||||
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
|
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
|
||||||
|
|
||||||
// Other flags
|
// Other flags
|
||||||
|
@ -188,7 +189,7 @@ void A32JitState::SetFpscr(u32 FPSCR) {
|
||||||
asimd_MXCSR = 0x00009fc0;
|
asimd_MXCSR = 0x00009fc0;
|
||||||
|
|
||||||
// RMode
|
// RMode
|
||||||
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
const std::array<u32, 4> MXCSR_RMode{0x0, 0x4000, 0x2000, 0x6000};
|
||||||
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
||||||
|
|
||||||
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
|
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
|
||||||
|
@ -196,9 +197,9 @@ void A32JitState::SetFpscr(u32 FPSCR) {
|
||||||
|
|
||||||
if (Common::Bit<24>(FPSCR)) {
|
if (Common::Bit<24>(FPSCR)) {
|
||||||
// VFP Flush to Zero
|
// VFP Flush to Zero
|
||||||
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
||||||
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -16,8 +16,8 @@ namespace Dynarmic::Backend::X64 {
|
||||||
class BlockOfCode;
|
class BlockOfCode;
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(push)
|
# pragma warning(push)
|
||||||
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
|
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct A32JitState {
|
struct A32JitState {
|
||||||
|
@ -25,7 +25,7 @@ struct A32JitState {
|
||||||
|
|
||||||
A32JitState() { ResetRSB(); }
|
A32JitState() { ResetRSB(); }
|
||||||
|
|
||||||
std::array<u32, 16> Reg{}; // Current register file.
|
std::array<u32, 16> Reg{}; // Current register file.
|
||||||
// TODO: Mode-specific register sets unimplemented.
|
// TODO: Mode-specific register sets unimplemented.
|
||||||
|
|
||||||
u32 upper_location_descriptor = 0;
|
u32 upper_location_descriptor = 0;
|
||||||
|
@ -37,7 +37,7 @@ struct A32JitState {
|
||||||
u32 Cpsr() const;
|
u32 Cpsr() const;
|
||||||
void SetCpsr(u32 cpsr);
|
void SetCpsr(u32 cpsr);
|
||||||
|
|
||||||
alignas(16) std::array<u32, 64> ExtReg{}; // Extension registers.
|
alignas(16) std::array<u32, 64> ExtReg{}; // Extension registers.
|
||||||
|
|
||||||
// For internal use (See: BlockOfCode::RunCode)
|
// For internal use (See: BlockOfCode::RunCode)
|
||||||
u32 guest_MXCSR = 0x00001f80;
|
u32 guest_MXCSR = 0x00001f80;
|
||||||
|
@ -47,7 +47,7 @@ struct A32JitState {
|
||||||
// Exclusive state
|
// Exclusive state
|
||||||
u32 exclusive_state = 0;
|
u32 exclusive_state = 0;
|
||||||
|
|
||||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||||
static constexpr size_t RSBPtrMask = RSBSize - 1;
|
static constexpr size_t RSBPtrMask = RSBSize - 1;
|
||||||
u32 rsb_ptr = 0;
|
u32 rsb_ptr = 0;
|
||||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||||
|
@ -55,7 +55,7 @@ struct A32JitState {
|
||||||
void ResetRSB();
|
void ResetRSB();
|
||||||
|
|
||||||
u32 fpsr_exc = 0;
|
u32 fpsr_exc = 0;
|
||||||
u32 fpsr_qc = 0; // Dummy value
|
u32 fpsr_qc = 0; // Dummy value
|
||||||
u32 fpsr_nzcv = 0;
|
u32 fpsr_nzcv = 0;
|
||||||
u32 Fpscr() const;
|
u32 Fpscr() const;
|
||||||
void SetFpscr(u32 FPSCR);
|
void SetFpscr(u32 FPSCR);
|
||||||
|
@ -91,9 +91,9 @@ struct A32JitState {
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(pop)
|
# pragma warning(pop)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using CodePtr = const void*;
|
using CodePtr = const void*;
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,13 +3,14 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/a64_emit_x64.h"
|
||||||
|
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
#include <fmt/ostream.h>
|
#include <fmt/ostream.h>
|
||||||
#include <mp/traits/integer_of_size.h>
|
#include <mp/traits/integer_of_size.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/a64_emit_x64.h"
|
|
||||||
#include "dynarmic/backend/x64/a64_jitstate.h"
|
#include "dynarmic/backend/x64/a64_jitstate.h"
|
||||||
#include "dynarmic/backend/x64/abi.h"
|
#include "dynarmic/backend/x64/abi.h"
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
|
@ -38,7 +39,7 @@ namespace Dynarmic::Backend::X64 {
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
||||||
: EmitContext(reg_alloc, block), conf(conf) {}
|
: EmitContext(reg_alloc, block), conf(conf) {}
|
||||||
|
|
||||||
A64::LocationDescriptor A64EmitContext::Location() const {
|
A64::LocationDescriptor A64EmitContext::Location() const {
|
||||||
return A64::LocationDescriptor{block.Location()};
|
return A64::LocationDescriptor{block.Location()};
|
||||||
|
@ -67,7 +68,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
|
||||||
code.EnableWriting();
|
code.EnableWriting();
|
||||||
SCOPE_EXIT { code.DisableWriting(); };
|
SCOPE_EXIT { code.DisableWriting(); };
|
||||||
|
|
||||||
static const std::vector<HostLoc> gpr_order = [this]{
|
static const std::vector<HostLoc> gpr_order = [this] {
|
||||||
std::vector<HostLoc> gprs{any_gpr};
|
std::vector<HostLoc> gprs{any_gpr};
|
||||||
if (conf.page_table) {
|
if (conf.page_table) {
|
||||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||||
|
@ -92,16 +93,15 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
|
||||||
|
|
||||||
// Call the relevant Emit* member function.
|
// Call the relevant Emit* member function.
|
||||||
switch (inst->GetOpcode()) {
|
switch (inst->GetOpcode()) {
|
||||||
|
#define OPCODE(name, type, ...) \
|
||||||
#define OPCODE(name, type, ...) \
|
case IR::Opcode::name: \
|
||||||
case IR::Opcode::name: \
|
A64EmitX64::Emit##name(ctx, inst); \
|
||||||
A64EmitX64::Emit##name(ctx, inst); \
|
break;
|
||||||
break;
|
|
||||||
#define A32OPC(...)
|
#define A32OPC(...)
|
||||||
#define A64OPC(name, type, ...) \
|
#define A64OPC(name, type, ...) \
|
||||||
case IR::Opcode::A64##name: \
|
case IR::Opcode::A64##name: \
|
||||||
A64EmitX64::EmitA64##name(ctx, inst); \
|
A64EmitX64::EmitA64##name(ctx, inst); \
|
||||||
break;
|
break;
|
||||||
#include "dynarmic/ir/opcodes.inc"
|
#include "dynarmic/ir/opcodes.inc"
|
||||||
#undef OPCODE
|
#undef OPCODE
|
||||||
#undef A32OPC
|
#undef A32OPC
|
||||||
|
@ -150,14 +150,13 @@ void A64EmitX64::ClearFastDispatchTable() {
|
||||||
|
|
||||||
void A64EmitX64::GenMemory128Accessors() {
|
void A64EmitX64::GenMemory128Accessors() {
|
||||||
code.align();
|
code.align();
|
||||||
memory_read_128 = code.getCurr<void(*)()>();
|
memory_read_128 = code.getCurr<void (*)()>();
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code,
|
Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) {
|
||||||
[&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) {
|
code.mov(code.ABI_PARAM3, code.ABI_PARAM2);
|
||||||
code.mov(code.ABI_PARAM3, code.ABI_PARAM2);
|
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
|
||||||
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
|
code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||||
code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]);
|
});
|
||||||
});
|
|
||||||
code.movups(xmm1, xword[code.ABI_RETURN]);
|
code.movups(xmm1, xword[code.ABI_RETURN]);
|
||||||
code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE);
|
code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE);
|
||||||
#else
|
#else
|
||||||
|
@ -177,7 +176,7 @@ void A64EmitX64::GenMemory128Accessors() {
|
||||||
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128");
|
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128");
|
||||||
|
|
||||||
code.align();
|
code.align();
|
||||||
memory_write_128 = code.getCurr<void(*)()>();
|
memory_write_128 = code.getCurr<void (*)()>();
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
|
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
|
||||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||||
|
@ -223,7 +222,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
||||||
|
|
||||||
for (int value_idx : idxes) {
|
for (int value_idx : idxes) {
|
||||||
code.align();
|
code.align();
|
||||||
read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
|
read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
|
||||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
|
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
|
||||||
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
||||||
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
||||||
|
@ -237,7 +236,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
||||||
PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128");
|
PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128");
|
||||||
|
|
||||||
code.align();
|
code.align();
|
||||||
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
|
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
|
||||||
ABI_PushCallerSaveRegistersAndAdjustStack(code);
|
ABI_PushCallerSaveRegistersAndAdjustStack(code);
|
||||||
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
||||||
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
||||||
|
@ -256,7 +255,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
||||||
|
|
||||||
for (const auto& [bitsize, callback] : read_callbacks) {
|
for (const auto& [bitsize, callback] : read_callbacks) {
|
||||||
code.align();
|
code.align();
|
||||||
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
|
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
|
||||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
|
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
|
||||||
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
|
||||||
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
|
||||||
|
@ -272,7 +271,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
||||||
|
|
||||||
for (const auto& [bitsize, callback] : write_callbacks) {
|
for (const auto& [bitsize, callback] : write_callbacks) {
|
||||||
code.align();
|
code.align();
|
||||||
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
|
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
|
||||||
ABI_PushCallerSaveRegistersAndAdjustStack(code);
|
ABI_PushCallerSaveRegistersAndAdjustStack(code);
|
||||||
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
|
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
|
||||||
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
|
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
|
||||||
|
@ -353,7 +352,7 @@ void A64EmitX64::GenTerminalHandlers() {
|
||||||
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");
|
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");
|
||||||
|
|
||||||
code.align();
|
code.align();
|
||||||
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>();
|
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
|
||||||
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
|
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
|
||||||
|
@ -542,7 +541,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||||
|
|
||||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
code.movq(to_store, to_store); // TODO: Remove when able
|
code.movq(to_store, to_store); // TODO: Remove when able
|
||||||
code.movaps(addr, to_store);
|
code.movaps(addr, to_store);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -628,10 +627,9 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ASSERT(args[0].IsImmediate());
|
ASSERT(args[0].IsImmediate());
|
||||||
const u32 imm = args[0].GetImmediateU32();
|
const u32 imm = args[0].GetImmediateU32();
|
||||||
Devirtualize<&A64::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code,
|
Devirtualize<&A64::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code, [&](RegList param) {
|
||||||
[&](RegList param) {
|
code.mov(param[0], imm);
|
||||||
code.mov(param[0], imm);
|
});
|
||||||
});
|
|
||||||
// The kernel would have to execute ERET to get here, which would clear exclusive state.
|
// The kernel would have to execute ERET to get here, which would clear exclusive state.
|
||||||
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
|
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||||
}
|
}
|
||||||
|
@ -642,11 +640,10 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
ASSERT(args[0].IsImmediate() && args[1].IsImmediate());
|
ASSERT(args[0].IsImmediate() && args[1].IsImmediate());
|
||||||
const u64 pc = args[0].GetImmediateU64();
|
const u64 pc = args[0].GetImmediateU64();
|
||||||
const u64 exception = args[1].GetImmediateU64();
|
const u64 exception = args[1].GetImmediateU64();
|
||||||
Devirtualize<&A64::UserCallbacks::ExceptionRaised>(conf.callbacks).EmitCall(code,
|
Devirtualize<&A64::UserCallbacks::ExceptionRaised>(conf.callbacks).EmitCall(code, [&](RegList param) {
|
||||||
[&](RegList param) {
|
code.mov(param[0], pc);
|
||||||
code.mov(param[0], pc);
|
code.mov(param[1], exception);
|
||||||
code.mov(param[1], exception);
|
});
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -881,7 +878,7 @@ void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbya
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namepsace
|
} // namespace
|
||||||
|
|
||||||
template<std::size_t bitsize>
|
template<std::size_t bitsize>
|
||||||
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1090,8 +1087,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
|
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
|
||||||
return (conf.callbacks->*callback)(vaddr);
|
return (conf.callbacks->*callback)(vaddr);
|
||||||
});
|
});
|
||||||
}
|
});
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
|
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
|
||||||
|
@ -1107,8 +1103,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector {
|
ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector {
|
||||||
return (conf.callbacks->*callback)(vaddr);
|
return (conf.callbacks->*callback)(vaddr);
|
||||||
});
|
});
|
||||||
}
|
});
|
||||||
);
|
|
||||||
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
|
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
|
||||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||||
|
|
||||||
|
@ -1163,11 +1158,12 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.CallLambda(
|
code.CallLambda(
|
||||||
[](A64::UserConfig& conf, u64 vaddr, T value) -> u32 {
|
[](A64::UserConfig& conf, u64 vaddr, T value) -> u32 {
|
||||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
||||||
[&](T expected) -> bool {
|
[&](T expected) -> bool {
|
||||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||||
}) ? 0 : 1;
|
})
|
||||||
}
|
? 0
|
||||||
);
|
: 1;
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||||
|
@ -1175,11 +1171,12 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.CallLambda(
|
code.CallLambda(
|
||||||
[](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 {
|
[](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 {
|
||||||
return conf.global_monitor->DoExclusiveOperation<A64::Vector>(conf.processor_id, vaddr,
|
return conf.global_monitor->DoExclusiveOperation<A64::Vector>(conf.processor_id, vaddr,
|
||||||
[&](A64::Vector expected) -> bool {
|
[&](A64::Vector expected) -> bool {
|
||||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||||
}) ? 0 : 1;
|
})
|
||||||
}
|
? 0
|
||||||
);
|
: 1;
|
||||||
|
});
|
||||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||||
}
|
}
|
||||||
code.L(end);
|
code.L(end);
|
||||||
|
@ -1214,13 +1211,12 @@ std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescr
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor, bool) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor, bool) {
|
||||||
code.SwitchMxcsrOnExit();
|
code.SwitchMxcsrOnExit();
|
||||||
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code,
|
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) {
|
||||||
[&](RegList param) {
|
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
|
||||||
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
|
code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]);
|
||||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]);
|
code.mov(param[1].cvt32(), terminal.num_instructions);
|
||||||
code.mov(param[1].cvt32(), terminal.num_instructions);
|
});
|
||||||
});
|
code.ReturnFromRunCode(true); // TODO: Check cycles
|
||||||
code.ReturnFromRunCode(true); // TODO: Check cycles
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
||||||
|
@ -1352,4 +1348,4 @@ void A64EmitX64::Unpatch(const IR::LocationDescriptor& location) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -69,8 +69,8 @@ protected:
|
||||||
void (*memory_write_128)();
|
void (*memory_write_128)();
|
||||||
void GenMemory128Accessors();
|
void GenMemory128Accessors();
|
||||||
|
|
||||||
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks;
|
std::map<std::tuple<size_t, int, int>, void (*)()> read_fallbacks;
|
||||||
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks;
|
std::map<std::tuple<size_t, int, int>, void (*)()> write_fallbacks;
|
||||||
void GenFastmemFallbacks();
|
void GenFastmemFallbacks();
|
||||||
|
|
||||||
const void* terminal_handler_pop_rsb_hint;
|
const void* terminal_handler_pop_rsb_hint;
|
||||||
|
@ -118,4 +118,4 @@ protected:
|
||||||
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
|
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -44,10 +44,9 @@ static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
|
||||||
struct Jit::Impl final {
|
struct Jit::Impl final {
|
||||||
public:
|
public:
|
||||||
Impl(Jit* jit, UserConfig conf)
|
Impl(Jit* jit, UserConfig conf)
|
||||||
: conf(conf)
|
: conf(conf)
|
||||||
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
||||||
, emitter(block_of_code, conf, jit)
|
, emitter(block_of_code, conf, jit) {
|
||||||
{
|
|
||||||
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
|
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,7 +60,7 @@ public:
|
||||||
|
|
||||||
// TODO: Check code alignment
|
// TODO: Check code alignment
|
||||||
|
|
||||||
const CodePtr current_code_ptr = [this]{
|
const CodePtr current_code_ptr = [this] {
|
||||||
// RSB optimization
|
// RSB optimization
|
||||||
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
|
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
|
||||||
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||||
|
@ -233,7 +232,7 @@ private:
|
||||||
// JIT Compile
|
// JIT Compile
|
||||||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
|
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
|
||||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||||
Optimization::A64GetSetElimination(ir_block);
|
Optimization::A64GetSetElimination(ir_block);
|
||||||
|
@ -287,7 +286,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
Jit::Jit(UserConfig conf)
|
Jit::Jit(UserConfig conf)
|
||||||
: impl(std::make_unique<Jit::Impl>(this, conf)) {}
|
: impl(std::make_unique<Jit::Impl>(this, conf)) {}
|
||||||
|
|
||||||
Jit::~Jit() = default;
|
Jit::~Jit() = default;
|
||||||
|
|
||||||
|
@ -399,4 +398,4 @@ std::string Jit::Disassemble() const {
|
||||||
return impl->Disassemble();
|
return impl->Disassemble();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::A64
|
} // namespace Dynarmic::A64
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/a64_jitstate.h"
|
#include "dynarmic/backend/x64/a64_jitstate.h"
|
||||||
|
|
||||||
#include "dynarmic/common/bit_util.h"
|
#include "dynarmic/common/bit_util.h"
|
||||||
#include "dynarmic/frontend/A64/location_descriptor.h"
|
#include "dynarmic/frontend/A64/location_descriptor.h"
|
||||||
|
|
||||||
|
@ -58,15 +59,15 @@ void A64JitState::SetFpcr(u32 value) {
|
||||||
asimd_MXCSR &= 0x0000003D;
|
asimd_MXCSR &= 0x0000003D;
|
||||||
guest_MXCSR &= 0x0000003D;
|
guest_MXCSR &= 0x0000003D;
|
||||||
asimd_MXCSR |= 0x00001f80;
|
asimd_MXCSR |= 0x00001f80;
|
||||||
guest_MXCSR |= 0x00001f80; // Mask all exceptions
|
guest_MXCSR |= 0x00001f80; // Mask all exceptions
|
||||||
|
|
||||||
// RMode
|
// RMode
|
||||||
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
const std::array<u32, 4> MXCSR_RMode{0x0, 0x4000, 0x2000, 0x6000};
|
||||||
guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3];
|
guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3];
|
||||||
|
|
||||||
if (Common::Bit<24>(value)) {
|
if (Common::Bit<24>(value)) {
|
||||||
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
||||||
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,4 +112,4 @@ void A64JitState::SetFpsr(u32 value) {
|
||||||
fpsr_exc = value & 0x9F;
|
fpsr_exc = value & 0x9F;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -18,8 +18,8 @@ namespace Dynarmic::Backend::X64 {
|
||||||
class BlockOfCode;
|
class BlockOfCode;
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(push)
|
# pragma warning(push)
|
||||||
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
|
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct A64JitState {
|
struct A64JitState {
|
||||||
|
@ -40,7 +40,7 @@ struct A64JitState {
|
||||||
cpsr_nzcv = NZCV::ToX64(new_pstate);
|
cpsr_nzcv = NZCV::ToX64(new_pstate);
|
||||||
}
|
}
|
||||||
|
|
||||||
alignas(16) std::array<u64, 64> vec{}; // Extension registers.
|
alignas(16) std::array<u64, 64> vec{}; // Extension registers.
|
||||||
|
|
||||||
// For internal use (See: BlockOfCode::RunCode)
|
// For internal use (See: BlockOfCode::RunCode)
|
||||||
u32 guest_MXCSR = 0x00001f80;
|
u32 guest_MXCSR = 0x00001f80;
|
||||||
|
@ -51,7 +51,7 @@ struct A64JitState {
|
||||||
static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull;
|
static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull;
|
||||||
u8 exclusive_state = 0;
|
u8 exclusive_state = 0;
|
||||||
|
|
||||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||||
static constexpr size_t RSBPtrMask = RSBSize - 1;
|
static constexpr size_t RSBPtrMask = RSBSize - 1;
|
||||||
u32 rsb_ptr = 0;
|
u32 rsb_ptr = 0;
|
||||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||||
|
@ -77,9 +77,9 @@ struct A64JitState {
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(pop)
|
# pragma warning(pop)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using CodePtr = const void*;
|
using CodePtr = const void*;
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,12 +3,13 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/abi.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <xbyak.h>
|
#include <xbyak.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/abi.h"
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/iterator_util.h"
|
#include "dynarmic/common/iterator_util.h"
|
||||||
|
@ -131,4 +132,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc e
|
||||||
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -61,7 +61,7 @@ constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
|
||||||
HostLoc::XMM15,
|
HostLoc::XMM15,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
|
constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ constexpr std::array<HostLoc, 6> ABI_ALL_CALLEE_SAVE = {
|
||||||
HostLoc::R15,
|
HostLoc::R15,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
|
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -128,4 +128,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_si
|
||||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
|
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
|
||||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
|
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,6 +3,15 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define WIN32_LEAN_AND_MEAN
|
||||||
|
# include <windows.h>
|
||||||
|
#else
|
||||||
|
# include <sys/mman.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
|
@ -10,19 +19,12 @@
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/a32_jitstate.h"
|
#include "dynarmic/backend/x64/a32_jitstate.h"
|
||||||
#include "dynarmic/backend/x64/abi.h"
|
#include "dynarmic/backend/x64/abi.h"
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
|
||||||
#include "dynarmic/backend/x64/hostloc.h"
|
#include "dynarmic/backend/x64/hostloc.h"
|
||||||
#include "dynarmic/backend/x64/perf_map.h"
|
#include "dynarmic/backend/x64/perf_map.h"
|
||||||
#include "dynarmic/backend/x64/stack_layout.h"
|
#include "dynarmic/backend/x64/stack_layout.h"
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
#include "dynarmic/common/bit_util.h"
|
#include "dynarmic/common/bit_util.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#include <windows.h>
|
|
||||||
#else
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
@ -60,47 +62,66 @@ CustomXbyakAllocator s_allocator;
|
||||||
|
|
||||||
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
|
||||||
void ProtectMemory(const void* base, size_t size, bool is_executable) {
|
void ProtectMemory(const void* base, size_t size, bool is_executable) {
|
||||||
#ifdef _WIN32
|
# ifdef _WIN32
|
||||||
DWORD oldProtect = 0;
|
DWORD oldProtect = 0;
|
||||||
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
|
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
|
||||||
#else
|
# else
|
||||||
static const size_t pageSize = sysconf(_SC_PAGESIZE);
|
static const size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||||
const size_t iaddr = reinterpret_cast<size_t>(base);
|
const size_t iaddr = reinterpret_cast<size_t>(base);
|
||||||
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||||
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
|
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
|
||||||
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
|
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
|
||||||
#endif
|
# endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
HostFeature GetHostFeatures()
|
HostFeature GetHostFeatures() {
|
||||||
{
|
|
||||||
HostFeature features = {};
|
HostFeature features = {};
|
||||||
|
|
||||||
#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
|
#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
|
||||||
using Cpu = Xbyak::util::Cpu;
|
using Cpu = Xbyak::util::Cpu;
|
||||||
Xbyak::util::Cpu cpu_info;
|
Xbyak::util::Cpu cpu_info;
|
||||||
|
|
||||||
if (cpu_info.has(Cpu::tSSSE3)) features |= HostFeature::SSSE3;
|
if (cpu_info.has(Cpu::tSSSE3))
|
||||||
if (cpu_info.has(Cpu::tSSE41)) features |= HostFeature::SSE41;
|
features |= HostFeature::SSSE3;
|
||||||
if (cpu_info.has(Cpu::tSSE42)) features |= HostFeature::SSE42;
|
if (cpu_info.has(Cpu::tSSE41))
|
||||||
if (cpu_info.has(Cpu::tAVX)) features |= HostFeature::AVX;
|
features |= HostFeature::SSE41;
|
||||||
if (cpu_info.has(Cpu::tAVX2)) features |= HostFeature::AVX2;
|
if (cpu_info.has(Cpu::tSSE42))
|
||||||
if (cpu_info.has(Cpu::tAVX512F)) features |= HostFeature::AVX512F;
|
features |= HostFeature::SSE42;
|
||||||
if (cpu_info.has(Cpu::tAVX512CD)) features |= HostFeature::AVX512CD;
|
if (cpu_info.has(Cpu::tAVX))
|
||||||
if (cpu_info.has(Cpu::tAVX512VL)) features |= HostFeature::AVX512VL;
|
features |= HostFeature::AVX;
|
||||||
if (cpu_info.has(Cpu::tAVX512BW)) features |= HostFeature::AVX512BW;
|
if (cpu_info.has(Cpu::tAVX2))
|
||||||
if (cpu_info.has(Cpu::tAVX512DQ)) features |= HostFeature::AVX512DQ;
|
features |= HostFeature::AVX2;
|
||||||
if (cpu_info.has(Cpu::tAVX512_BITALG)) features |= HostFeature::AVX512BITALG;
|
if (cpu_info.has(Cpu::tAVX512F))
|
||||||
if (cpu_info.has(Cpu::tPCLMULQDQ)) features |= HostFeature::PCLMULQDQ;
|
features |= HostFeature::AVX512F;
|
||||||
if (cpu_info.has(Cpu::tF16C)) features |= HostFeature::F16C;
|
if (cpu_info.has(Cpu::tAVX512CD))
|
||||||
if (cpu_info.has(Cpu::tFMA)) features |= HostFeature::FMA;
|
features |= HostFeature::AVX512CD;
|
||||||
if (cpu_info.has(Cpu::tAESNI)) features |= HostFeature::AES;
|
if (cpu_info.has(Cpu::tAVX512VL))
|
||||||
if (cpu_info.has(Cpu::tPOPCNT)) features |= HostFeature::POPCNT;
|
features |= HostFeature::AVX512VL;
|
||||||
if (cpu_info.has(Cpu::tBMI1)) features |= HostFeature::BMI1;
|
if (cpu_info.has(Cpu::tAVX512BW))
|
||||||
if (cpu_info.has(Cpu::tBMI2)) features |= HostFeature::BMI2;
|
features |= HostFeature::AVX512BW;
|
||||||
if (cpu_info.has(Cpu::tLZCNT)) features |= HostFeature::LZCNT;
|
if (cpu_info.has(Cpu::tAVX512DQ))
|
||||||
if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI;
|
features |= HostFeature::AVX512DQ;
|
||||||
|
if (cpu_info.has(Cpu::tAVX512_BITALG))
|
||||||
|
features |= HostFeature::AVX512BITALG;
|
||||||
|
if (cpu_info.has(Cpu::tPCLMULQDQ))
|
||||||
|
features |= HostFeature::PCLMULQDQ;
|
||||||
|
if (cpu_info.has(Cpu::tF16C))
|
||||||
|
features |= HostFeature::F16C;
|
||||||
|
if (cpu_info.has(Cpu::tFMA))
|
||||||
|
features |= HostFeature::FMA;
|
||||||
|
if (cpu_info.has(Cpu::tAESNI))
|
||||||
|
features |= HostFeature::AES;
|
||||||
|
if (cpu_info.has(Cpu::tPOPCNT))
|
||||||
|
features |= HostFeature::POPCNT;
|
||||||
|
if (cpu_info.has(Cpu::tBMI1))
|
||||||
|
features |= HostFeature::BMI1;
|
||||||
|
if (cpu_info.has(Cpu::tBMI2))
|
||||||
|
features |= HostFeature::BMI2;
|
||||||
|
if (cpu_info.has(Cpu::tLZCNT))
|
||||||
|
features |= HostFeature::LZCNT;
|
||||||
|
if (cpu_info.has(Cpu::tGFNI))
|
||||||
|
features |= HostFeature::GFNI;
|
||||||
|
|
||||||
if (cpu_info.has(Cpu::tBMI2)) {
|
if (cpu_info.has(Cpu::tBMI2)) {
|
||||||
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
||||||
|
@ -109,7 +130,7 @@ HostFeature GetHostFeatures()
|
||||||
if (cpu_info.has(Cpu::tAMD)) {
|
if (cpu_info.has(Cpu::tAMD)) {
|
||||||
std::array<u32, 4> data{};
|
std::array<u32, 4> data{};
|
||||||
cpu_info.getCpuid(1, data.data());
|
cpu_info.getCpuid(1, data.data());
|
||||||
const u32 family_base = Common::Bits< 8, 11>(data[0]);
|
const u32 family_base = Common::Bits<8, 11>(data[0]);
|
||||||
const u32 family_extended = Common::Bits<20, 27>(data[0]);
|
const u32 family_extended = Common::Bits<20, 27>(data[0]);
|
||||||
const u32 family = family_base + family_extended;
|
const u32 family = family_base + family_extended;
|
||||||
if (family >= 0x19)
|
if (family >= 0x19)
|
||||||
|
@ -123,7 +144,7 @@ HostFeature GetHostFeatures()
|
||||||
return features;
|
return features;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function<void(BlockOfCode&)> rcp)
|
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function<void(BlockOfCode&)> rcp)
|
||||||
: Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator)
|
: Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator)
|
||||||
|
@ -131,8 +152,7 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_cod
|
||||||
, jsi(jsi)
|
, jsi(jsi)
|
||||||
, far_code_offset(far_code_offset)
|
, far_code_offset(far_code_offset)
|
||||||
, constant_pool(*this, CONSTANT_POOL_SIZE)
|
, constant_pool(*this, CONSTANT_POOL_SIZE)
|
||||||
, host_features(GetHostFeatures())
|
, host_features(GetHostFeatures()) {
|
||||||
{
|
|
||||||
ASSERT(total_code_size > far_code_offset);
|
ASSERT(total_code_size > far_code_offset);
|
||||||
EnableWriting();
|
EnableWriting();
|
||||||
GenRunCode(rcp);
|
GenRunCode(rcp);
|
||||||
|
@ -210,7 +230,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
||||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||||
|
|
||||||
mov(r15, ABI_PARAM1);
|
mov(r15, ABI_PARAM1);
|
||||||
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
|
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
|
||||||
|
|
||||||
cb.GetTicksRemaining->EmitCall(*this);
|
cb.GetTicksRemaining->EmitCall(*this);
|
||||||
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
|
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
|
||||||
|
@ -368,4 +388,4 @@ void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
|
||||||
nop(size - current_size);
|
nop(size - current_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -73,12 +73,12 @@ public:
|
||||||
void LookupBlock();
|
void LookupBlock();
|
||||||
|
|
||||||
/// Code emitter: Calls the function
|
/// Code emitter: Calls the function
|
||||||
template <typename FunctionPointer>
|
template<typename FunctionPointer>
|
||||||
void CallFunction(FunctionPointer fn) {
|
void CallFunction(FunctionPointer fn) {
|
||||||
static_assert(std::is_pointer_v<FunctionPointer> && std::is_function_v<std::remove_pointer_t<FunctionPointer>>,
|
static_assert(std::is_pointer_v<FunctionPointer> && std::is_function_v<std::remove_pointer_t<FunctionPointer>>,
|
||||||
"Supplied type must be a pointer to a function");
|
"Supplied type must be a pointer to a function");
|
||||||
|
|
||||||
const u64 address = reinterpret_cast<u64>(fn);
|
const u64 address = reinterpret_cast<u64>(fn);
|
||||||
const u64 distance = address - (getCurr<u64>() + 5);
|
const u64 distance = address - (getCurr<u64>() + 5);
|
||||||
|
|
||||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||||
|
@ -91,7 +91,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Code emitter: Calls the lambda. Lambda must not have any captures.
|
/// Code emitter: Calls the lambda. Lambda must not have any captures.
|
||||||
template <typename Lambda>
|
template<typename Lambda>
|
||||||
void CallLambda(Lambda l) {
|
void CallLambda(Lambda l) {
|
||||||
CallFunction(Common::FptrCast(l));
|
CallFunction(Common::FptrCast(l));
|
||||||
}
|
}
|
||||||
|
@ -165,7 +165,7 @@ private:
|
||||||
CodePtr near_code_ptr;
|
CodePtr near_code_ptr;
|
||||||
CodePtr far_code_ptr;
|
CodePtr far_code_ptr;
|
||||||
|
|
||||||
using RunCodeFuncType = void(*)(void*, CodePtr);
|
using RunCodeFuncType = void (*)(void*, CodePtr);
|
||||||
RunCodeFuncType run_code = nullptr;
|
RunCodeFuncType run_code = nullptr;
|
||||||
RunCodeFuncType step_code = nullptr;
|
RunCodeFuncType step_code = nullptr;
|
||||||
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
|
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
|
||||||
|
@ -176,4 +176,4 @@ private:
|
||||||
const HostFeature host_features;
|
const HostFeature host_features;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,32 +3,33 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/block_range_information.h"
|
||||||
|
|
||||||
#include <boost/icl/interval_map.hpp>
|
#include <boost/icl/interval_map.hpp>
|
||||||
#include <boost/icl/interval_set.hpp>
|
#include <boost/icl/interval_set.hpp>
|
||||||
#include <tsl/robin_set.h>
|
#include <tsl/robin_set.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_range_information.h"
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
template <typename ProgramCounterType>
|
template<typename ProgramCounterType>
|
||||||
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
|
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
|
||||||
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
|
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ProgramCounterType>
|
template<typename ProgramCounterType>
|
||||||
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
|
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
|
||||||
block_ranges.clear();
|
block_ranges.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ProgramCounterType>
|
template<typename ProgramCounterType>
|
||||||
tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
|
tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
|
||||||
tsl::robin_set<IR::LocationDescriptor> erase_locations;
|
tsl::robin_set<IR::LocationDescriptor> erase_locations;
|
||||||
for (auto invalidate_interval : ranges) {
|
for (auto invalidate_interval : ranges) {
|
||||||
auto pair = block_ranges.equal_range(invalidate_interval);
|
auto pair = block_ranges.equal_range(invalidate_interval);
|
||||||
for (auto it = pair.first; it != pair.second; ++it) {
|
for (auto it = pair.first; it != pair.second; ++it) {
|
||||||
for (const auto &descriptor : it->second) {
|
for (const auto& descriptor : it->second) {
|
||||||
erase_locations.insert(descriptor);
|
erase_locations.insert(descriptor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -40,4 +41,4 @@ tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>
|
||||||
template class BlockRangeInformation<u32>;
|
template class BlockRangeInformation<u32>;
|
||||||
template class BlockRangeInformation<u64>;
|
template class BlockRangeInformation<u64>;
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
template <typename ProgramCounterType>
|
template<typename ProgramCounterType>
|
||||||
class BlockRangeInformation {
|
class BlockRangeInformation {
|
||||||
public:
|
public:
|
||||||
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
|
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
|
||||||
|
@ -26,4 +26,4 @@ private:
|
||||||
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
|
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/callback.h"
|
#include "dynarmic/backend/x64/callback.h"
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
@ -37,4 +38,4 @@ void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<voi
|
||||||
code.CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -22,16 +22,23 @@ class Callback {
|
||||||
public:
|
public:
|
||||||
virtual ~Callback();
|
virtual ~Callback();
|
||||||
|
|
||||||
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const = 0;
|
void EmitCall(BlockOfCode& code) const {
|
||||||
|
EmitCall(code, [](RegList) {});
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const = 0;
|
||||||
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0;
|
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class SimpleCallback final : public Callback {
|
class SimpleCallback final : public Callback {
|
||||||
public:
|
public:
|
||||||
template <typename Function>
|
template<typename Function>
|
||||||
SimpleCallback(Function fn) : fn(reinterpret_cast<void(*)()>(fn)) {}
|
SimpleCallback(Function fn)
|
||||||
|
: fn(reinterpret_cast<void (*)()>(fn)) {}
|
||||||
|
|
||||||
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
|
using Callback::EmitCall;
|
||||||
|
|
||||||
|
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const override;
|
||||||
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
|
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -40,10 +47,13 @@ private:
|
||||||
|
|
||||||
class ArgCallback final : public Callback {
|
class ArgCallback final : public Callback {
|
||||||
public:
|
public:
|
||||||
template <typename Function>
|
template<typename Function>
|
||||||
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void(*)()>(fn)), arg(arg) {}
|
ArgCallback(Function fn, u64 arg)
|
||||||
|
: fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
|
||||||
|
|
||||||
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
|
using Callback::EmitCall;
|
||||||
|
|
||||||
|
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const override;
|
||||||
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
|
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -51,4 +61,4 @@ private:
|
||||||
u64 arg;
|
u64 arg;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,15 +3,17 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/constant_pool.h"
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
#include "dynarmic/backend/x64/constant_pool.h"
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) {
|
ConstantPool::ConstantPool(BlockOfCode& code, size_t size)
|
||||||
|
: code(code), pool_size(size) {
|
||||||
code.int3();
|
code.int3();
|
||||||
code.align(align_size);
|
code.align(align_size);
|
||||||
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
|
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
|
||||||
|
@ -31,4 +33,4 @@ Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 l
|
||||||
return frame[code.rip + iter->second];
|
return frame[code.rip + iter->second];
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -27,7 +27,7 @@ public:
|
||||||
Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
|
Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr size_t align_size = 16; // bytes
|
static constexpr size_t align_size = 16; // bytes
|
||||||
|
|
||||||
std::map<std::tuple<u64, u64>, void*> constant_info;
|
std::map<std::tuple<u64, u64>, void*> constant_info;
|
||||||
|
|
||||||
|
@ -37,4 +37,4 @@ private:
|
||||||
u8* current_pool_ptr;
|
u8* current_pool_ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -19,17 +19,17 @@ namespace Backend::X64 {
|
||||||
|
|
||||||
namespace impl {
|
namespace impl {
|
||||||
|
|
||||||
template <typename FunctionType, FunctionType mfp>
|
template<typename FunctionType, FunctionType mfp>
|
||||||
struct ThunkBuilder;
|
struct ThunkBuilder;
|
||||||
|
|
||||||
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
|
template<typename C, typename R, typename... Args, R (C::*mfp)(Args...)>
|
||||||
struct ThunkBuilder<R(C::*)(Args...), mfp> {
|
struct ThunkBuilder<R (C::*)(Args...), mfp> {
|
||||||
static R Thunk(C* this_, Args... args) {
|
static R Thunk(C* this_, Args... args) {
|
||||||
return (this_->*mfp)(std::forward<Args>(args)...);
|
return (this_->*mfp)(std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace impl
|
} // namespace impl
|
||||||
|
|
||||||
template<auto mfp>
|
template<auto mfp>
|
||||||
ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
|
ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
|
||||||
|
@ -77,5 +77,5 @@ ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Backend::X64
|
} // namespace Backend::X64
|
||||||
} // namespace Dynarmic
|
} // namespace Dynarmic
|
||||||
|
|
|
@ -3,12 +3,13 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/emit_x64.h"
|
||||||
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
|
||||||
#include <tsl/robin_set.h>
|
#include <tsl/robin_set.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
#include "dynarmic/backend/x64/emit_x64.h"
|
|
||||||
#include "dynarmic/backend/x64/nzcv_util.h"
|
#include "dynarmic/backend/x64/nzcv_util.h"
|
||||||
#include "dynarmic/backend/x64/perf_map.h"
|
#include "dynarmic/backend/x64/perf_map.h"
|
||||||
#include "dynarmic/backend/x64/stack_layout.h"
|
#include "dynarmic/backend/x64/stack_layout.h"
|
||||||
|
@ -29,7 +30,7 @@ namespace Dynarmic::Backend::X64 {
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||||
: reg_alloc(reg_alloc), block(block) {}
|
: reg_alloc(reg_alloc), block(block) {}
|
||||||
|
|
||||||
size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
|
size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
|
||||||
return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
|
return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
|
||||||
|
@ -40,7 +41,8 @@ void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||||
inst->ClearArgs();
|
inst->ClearArgs();
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitX64::EmitX64(BlockOfCode& code) : code(code) {
|
EmitX64::EmitX64(BlockOfCode& code)
|
||||||
|
: code(code) {
|
||||||
exception_handler.Register(code);
|
exception_handler.Register(code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,8 +75,8 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
|
||||||
|
|
||||||
const auto iter = block_descriptors.find(target);
|
const auto iter = block_descriptors.find(target);
|
||||||
CodePtr target_code_ptr = iter != block_descriptors.end()
|
CodePtr target_code_ptr = iter != block_descriptors.end()
|
||||||
? iter->second.entrypoint
|
? iter->second.entrypoint
|
||||||
: code.GetReturnFromRunCodeAddress();
|
: code.GetReturnFromRunCodeAddress();
|
||||||
|
|
||||||
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
||||||
|
|
||||||
|
@ -126,7 +128,7 @@ void EmitX64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
|
||||||
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const int bitsize = [&]{
|
const int bitsize = [&] {
|
||||||
switch (args[0].GetType()) {
|
switch (args[0].GetType()) {
|
||||||
case IR::Type::U8:
|
case IR::Type::U8:
|
||||||
return 8;
|
return 8;
|
||||||
|
@ -195,64 +197,64 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
||||||
// add al, 0x7F restores OF
|
// add al, 0x7F restores OF
|
||||||
|
|
||||||
switch (cond) {
|
switch (cond) {
|
||||||
case IR::Cond::EQ: //z
|
case IR::Cond::EQ: //z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jz(pass);
|
code.jz(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::NE: //!z
|
case IR::Cond::NE: //!z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jnz(pass);
|
code.jnz(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::CS: //c
|
case IR::Cond::CS: //c
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jc(pass);
|
code.jc(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::CC: //!c
|
case IR::Cond::CC: //!c
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jnc(pass);
|
code.jnc(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::MI: //n
|
case IR::Cond::MI: //n
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.js(pass);
|
code.js(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::PL: //!n
|
case IR::Cond::PL: //!n
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jns(pass);
|
code.jns(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::VS: //v
|
case IR::Cond::VS: //v
|
||||||
code.cmp(al, 0x81);
|
code.cmp(al, 0x81);
|
||||||
code.jo(pass);
|
code.jo(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::VC: //!v
|
case IR::Cond::VC: //!v
|
||||||
code.cmp(al, 0x81);
|
code.cmp(al, 0x81);
|
||||||
code.jno(pass);
|
code.jno(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::HI: //c & !z
|
case IR::Cond::HI: //c & !z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmc();
|
code.cmc();
|
||||||
code.ja(pass);
|
code.ja(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::LS: //!c | z
|
case IR::Cond::LS: //!c | z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmc();
|
code.cmc();
|
||||||
code.jna(pass);
|
code.jna(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::GE: // n == v
|
case IR::Cond::GE: // n == v
|
||||||
code.cmp(al, 0x81);
|
code.cmp(al, 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jge(pass);
|
code.jge(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::LT: // n != v
|
case IR::Cond::LT: // n != v
|
||||||
code.cmp(al, 0x81);
|
code.cmp(al, 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jl(pass);
|
code.jl(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::GT: // !z & (n == v)
|
case IR::Cond::GT: // !z & (n == v)
|
||||||
code.cmp(al, 0x81);
|
code.cmp(al, 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jg(pass);
|
code.jg(pass);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::LE: // z | (n != v)
|
case IR::Cond::LE: // z | (n != v)
|
||||||
code.cmp(al, 0x81);
|
code.cmp(al, 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.jle(pass);
|
code.jle(pass);
|
||||||
|
@ -325,7 +327,7 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
|
||||||
code.EnableWriting();
|
code.EnableWriting();
|
||||||
SCOPE_EXIT { code.DisableWriting(); };
|
SCOPE_EXIT { code.DisableWriting(); };
|
||||||
|
|
||||||
for (const auto &descriptor : locations) {
|
for (const auto& descriptor : locations) {
|
||||||
const auto it = block_descriptors.find(descriptor);
|
const auto it = block_descriptors.find(descriptor);
|
||||||
if (it == block_descriptors.end()) {
|
if (it == block_descriptors.end()) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -338,4 +340,4 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -13,7 +13,6 @@
|
||||||
|
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
#include <tsl/robin_set.h>
|
#include <tsl/robin_set.h>
|
||||||
|
|
||||||
#include <xbyak_util.h>
|
#include <xbyak_util.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/exception_handler.h"
|
#include "dynarmic/backend/x64/exception_handler.h"
|
||||||
|
@ -26,11 +25,11 @@
|
||||||
namespace Dynarmic::IR {
|
namespace Dynarmic::IR {
|
||||||
class Block;
|
class Block;
|
||||||
class Inst;
|
class Inst;
|
||||||
} // namespace Dynarmic::IR
|
} // namespace Dynarmic::IR
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
enum class OptimizationFlag : u32;
|
enum class OptimizationFlag : u32;
|
||||||
} // namespace Dynarmic
|
} // namespace Dynarmic
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
|
@ -41,10 +40,10 @@ using A64FullVectorWidth = std::integral_constant<size_t, 128>;
|
||||||
// Array alias that always sizes itself according to the given type T
|
// Array alias that always sizes itself according to the given type T
|
||||||
// relative to the size of a vector register. e.g. T = u32 would result
|
// relative to the size of a vector register. e.g. T = u32 would result
|
||||||
// in a std::array<u32, 4>.
|
// in a std::array<u32, 4>.
|
||||||
template <typename T>
|
template<typename T>
|
||||||
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
|
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>;
|
using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>;
|
||||||
|
|
||||||
struct EmitContext {
|
struct EmitContext {
|
||||||
|
@ -128,4 +127,4 @@ protected:
|
||||||
tsl::robin_map<IR::LocationDescriptor, PatchInformation> patch_information;
|
tsl::robin_map<IR::LocationDescriptor, PatchInformation> patch_information;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -73,7 +73,7 @@ void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AES)) {
|
if (code.HasHostFeature(HostFeature::AES)) {
|
||||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -105,4 +105,4 @@ void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitAESFunction(args, ctx, code, inst, AES::MixColumns);
|
EmitAESFunction(args, ctx, code, inst, AES::MixColumns);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -145,4 +145,4 @@ void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32ISO(code, ctx, inst, 64);
|
EmitCRC32ISO(code, ctx, inst, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -24,7 +24,7 @@ void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]);
|
const Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||||
|
|
||||||
code.shl(hi, 32);
|
code.shl(hi, 32);
|
||||||
code.mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits
|
code.mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits
|
||||||
code.or_(lo, hi);
|
code.or_(lo, hi);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, lo);
|
ctx.reg_alloc.DefineValue(inst, lo);
|
||||||
|
@ -146,64 +146,64 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
// add al, 0x7F restores OF
|
// add al, 0x7F restores OF
|
||||||
|
|
||||||
switch (args[0].GetImmediateCond()) {
|
switch (args[0].GetImmediateCond()) {
|
||||||
case IR::Cond::EQ: //z
|
case IR::Cond::EQ: //z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovz(else_, then_);
|
code.cmovz(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::NE: //!z
|
case IR::Cond::NE: //!z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovnz(else_, then_);
|
code.cmovnz(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::CS: //c
|
case IR::Cond::CS: //c
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovc(else_, then_);
|
code.cmovc(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::CC: //!c
|
case IR::Cond::CC: //!c
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovnc(else_, then_);
|
code.cmovnc(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::MI: //n
|
case IR::Cond::MI: //n
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovs(else_, then_);
|
code.cmovs(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::PL: //!n
|
case IR::Cond::PL: //!n
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovns(else_, then_);
|
code.cmovns(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::VS: //v
|
case IR::Cond::VS: //v
|
||||||
code.cmp(nzcv.cvt8(), 0x81);
|
code.cmp(nzcv.cvt8(), 0x81);
|
||||||
code.cmovo(else_, then_);
|
code.cmovo(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::VC: //!v
|
case IR::Cond::VC: //!v
|
||||||
code.cmp(nzcv.cvt8(), 0x81);
|
code.cmp(nzcv.cvt8(), 0x81);
|
||||||
code.cmovno(else_, then_);
|
code.cmovno(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::HI: //c & !z
|
case IR::Cond::HI: //c & !z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmc();
|
code.cmc();
|
||||||
code.cmova(else_, then_);
|
code.cmova(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::LS: //!c | z
|
case IR::Cond::LS: //!c | z
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmc();
|
code.cmc();
|
||||||
code.cmovna(else_, then_);
|
code.cmovna(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::GE: // n == v
|
case IR::Cond::GE: // n == v
|
||||||
code.cmp(nzcv.cvt8(), 0x81);
|
code.cmp(nzcv.cvt8(), 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovge(else_, then_);
|
code.cmovge(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::LT: // n != v
|
case IR::Cond::LT: // n != v
|
||||||
code.cmp(nzcv.cvt8(), 0x81);
|
code.cmp(nzcv.cvt8(), 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovl(else_, then_);
|
code.cmovl(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::GT: // !z & (n == v)
|
case IR::Cond::GT: // !z & (n == v)
|
||||||
code.cmp(nzcv.cvt8(), 0x81);
|
code.cmp(nzcv.cvt8(), 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovg(else_, then_);
|
code.cmovg(else_, then_);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::LE: // z | (n != v)
|
case IR::Cond::LE: // z | (n != v)
|
||||||
code.cmp(nzcv.cvt8(), 0x81);
|
code.cmp(nzcv.cvt8(), 0x81);
|
||||||
code.sahf();
|
code.sahf();
|
||||||
code.cmovle(else_, then_);
|
code.cmovle(else_, then_);
|
||||||
|
@ -814,7 +814,7 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ShfitFT, typename BMI2FT>
|
template<typename ShfitFT, typename BMI2FT>
|
||||||
static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
|
static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto& operand_arg = args[0];
|
auto& operand_arg = args[0];
|
||||||
|
@ -851,7 +851,7 @@ static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ShfitFT, typename BMI2FT>
|
template<typename ShfitFT, typename BMI2FT>
|
||||||
static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
|
static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto& operand_arg = args[0];
|
auto& operand_arg = args[0];
|
||||||
|
@ -889,35 +889,43 @@ static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
|
EmitMaskedShift32(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
|
EmitMaskedShift64(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
|
EmitMaskedShift32(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
|
EmitMaskedShift64(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
|
EmitMaskedShift32(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
|
EmitMaskedShift64(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
|
EmitMaskedShift32(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
|
EmitMaskedShift64(
|
||||||
|
code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) {
|
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) {
|
||||||
|
@ -1132,25 +1140,25 @@ void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
||||||
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
|
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
|
||||||
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
|
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
|
||||||
code.mul(*op_arg);
|
code.mul(*op_arg);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, rdx);
|
ctx.reg_alloc.DefineValue(inst, rdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
|
||||||
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
|
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
|
||||||
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
|
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
|
||||||
code.imul(*op_arg);
|
code.imul(*op_arg);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, rdx);
|
ctx.reg_alloc.DefineValue(inst, rdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1441,7 +1449,7 @@ void EmitX64::EmitZeroExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) {
|
||||||
void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]);
|
const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||||
code.mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move
|
code.mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1505,27 +1513,27 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
if (code.HasHostFeature(HostFeature::LZCNT)) {
|
if (code.HasHostFeature(HostFeature::LZCNT)) {
|
||||||
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64();
|
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64();
|
||||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||||
|
|
||||||
code.lzcnt(result, source);
|
code.lzcnt(result, source);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64();
|
const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64();
|
||||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||||
|
|
||||||
// The result of a bsr of zero is undefined, but zf is set after it.
|
// The result of a bsr of zero is undefined, but zf is set after it.
|
||||||
code.bsr(result, source);
|
code.bsr(result, source);
|
||||||
code.mov(source.cvt32(), 0xFFFFFFFF);
|
code.mov(source.cvt32(), 0xFFFFFFFF);
|
||||||
code.cmovz(result.cvt32(), source.cvt32());
|
code.cmovz(result.cvt32(), source.cvt32());
|
||||||
code.neg(result.cvt32());
|
code.neg(result.cvt32());
|
||||||
code.add(result.cvt32(), 63);
|
code.add(result.cvt32(), 63);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitMaxSigned32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitMaxSigned32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1624,4 +1632,4 @@ void EmitX64::EmitMinUnsigned64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, y);
|
ctx.reg_alloc.DefineValue(inst, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -51,32 +51,32 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
|
||||||
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
|
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
|
||||||
constexpr u64 f64_smallest_normal = 0x0010000000000000u;
|
constexpr u64 f64_smallest_normal = 0x0010000000000000u;
|
||||||
|
|
||||||
constexpr u64 f64_min_s16 = 0xc0e0000000000000u; // -32768 as a double
|
constexpr u64 f64_min_s16 = 0xc0e0000000000000u; // -32768 as a double
|
||||||
constexpr u64 f64_max_s16 = 0x40dfffc000000000u; // 32767 as a double
|
constexpr u64 f64_max_s16 = 0x40dfffc000000000u; // 32767 as a double
|
||||||
constexpr u64 f64_min_u16 = 0x0000000000000000u; // 0 as a double
|
constexpr u64 f64_min_u16 = 0x0000000000000000u; // 0 as a double
|
||||||
constexpr u64 f64_max_u16 = 0x40efffe000000000u; // 65535 as a double
|
constexpr u64 f64_max_u16 = 0x40efffe000000000u; // 65535 as a double
|
||||||
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
||||||
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
||||||
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
|
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
|
||||||
constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable)
|
constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable)
|
||||||
constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
|
constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
|
||||||
constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)
|
constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)
|
||||||
|
|
||||||
#define FCODE(NAME) \
|
#define FCODE(NAME) \
|
||||||
[&code](auto... args){ \
|
[&code](auto... args) { \
|
||||||
if constexpr (fsize == 32) { \
|
if constexpr (fsize == 32) { \
|
||||||
code.NAME##s(args...); \
|
code.NAME##s(args...); \
|
||||||
} else { \
|
} else { \
|
||||||
code.NAME##d(args...); \
|
code.NAME##d(args...); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#define ICODE(NAME) \
|
#define ICODE(NAME) \
|
||||||
[&code](auto... args){ \
|
[&code](auto... args) { \
|
||||||
if constexpr (fsize == 32) { \
|
if constexpr (fsize == 32) { \
|
||||||
code.NAME##d(args...); \
|
code.NAME##d(args...); \
|
||||||
} else { \
|
} else { \
|
||||||
code.NAME##q(args...); \
|
code.NAME##q(args...); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<int> ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) {
|
std::optional<int> ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) {
|
||||||
|
@ -117,7 +117,7 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
||||||
code.xorps(xmm_scratch, xmm_scratch);
|
code.xorps(xmm_scratch, xmm_scratch);
|
||||||
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
||||||
code.pand(xmm_value, xmm_scratch);
|
code.pand(xmm_value, xmm_scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -248,7 +248,7 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t fsize, typename Function>
|
template<size_t fsize, typename Function>
|
||||||
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
@ -276,7 +276,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t fsize, typename Function>
|
template<size_t fsize, typename Function>
|
||||||
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
@ -331,7 +331,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
@ -459,7 +459,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here!
|
const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here!
|
||||||
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
|
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
tmp.setBit(fsize);
|
tmp.setBit(fsize);
|
||||||
|
|
||||||
|
@ -793,7 +793,7 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPRecipEstimate<64>(code, ctx, inst);
|
EmitFPRecipEstimate<64>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t fsize>
|
template<size_t fsize>
|
||||||
static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
@ -930,8 +930,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
|
||||||
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
||||||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>
|
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||||
>;
|
|
||||||
using exact_list = mp::list<std::true_type, std::false_type>;
|
using exact_list = mp::list<std::true_type, std::false_type>;
|
||||||
|
|
||||||
static const auto lut = Common::GenerateLookupTableFromList(
|
static const auto lut = Common::GenerateLookupTableFromList(
|
||||||
|
@ -947,12 +946,9 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
|
||||||
using InputSize = mp::unsigned_integer_of_size<fsize>;
|
using InputSize = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr);
|
return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr);
|
||||||
}
|
})};
|
||||||
)
|
|
||||||
};
|
|
||||||
},
|
},
|
||||||
mp::cartesian_product<fsize_list, rounding_list, exact_list>{}
|
mp::cartesian_product<fsize_list, rounding_list, exact_list>{});
|
||||||
);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||||
|
@ -1267,7 +1263,7 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
|
static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
|
||||||
ctx.reg_alloc.ScratchGpr(HostLoc::RCX); // shifting requires use of cl
|
ctx.reg_alloc.ScratchGpr(HostLoc::RCX); // shifting requires use of cl
|
||||||
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr();
|
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
// x64 flags ARM flags
|
// x64 flags ARM flags
|
||||||
|
@ -1287,7 +1283,7 @@ static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
|
||||||
|
|
||||||
code.mov(nzcv, 0x0101'4100'8000'0100);
|
code.mov(nzcv, 0x0101'4100'8000'0100);
|
||||||
code.sete(cl);
|
code.sete(cl);
|
||||||
code.rcl(cl, 5); // cl = ZF:CF:0000
|
code.rcl(cl, 5); // cl = ZF:CF:0000
|
||||||
code.shr(nzcv, cl);
|
code.shr(nzcv, cl);
|
||||||
|
|
||||||
return nzcv;
|
return nzcv;
|
||||||
|
@ -1467,7 +1463,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
|
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::SSE41) && round_imm){
|
if (code.HasHostFeature(HostFeature::SSE41) && round_imm) {
|
||||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||||
|
@ -1512,7 +1508,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.jmp(end);
|
code.jmp(end);
|
||||||
code.L(below_max);
|
code.L(below_max);
|
||||||
}
|
}
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
code.L(end);
|
code.L(end);
|
||||||
|
|
||||||
code.SwitchToFarCode();
|
code.SwitchToFarCode();
|
||||||
|
@ -1524,14 +1520,14 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
|
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
|
||||||
if (unsigned_) {
|
if (unsigned_) {
|
||||||
code.maxsd(src, code.MConst(xword, f64_min_u32));
|
code.maxsd(src, code.MConst(xword, f64_min_u32));
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
} else {
|
} else {
|
||||||
code.cvttsd2si(result.cvt32(), src);
|
code.cvttsd2si(result.cvt32(), src);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
|
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
|
||||||
code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16));
|
code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16));
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1546,8 +1542,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
||||||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>
|
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||||
>;
|
|
||||||
|
|
||||||
static const auto lut = Common::GenerateLookupTableFromList(
|
static const auto lut = Common::GenerateLookupTableFromList(
|
||||||
[](auto args) {
|
[](auto args) {
|
||||||
|
@ -1561,12 +1556,9 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
return FP::FPToFixed<FPT>(isize, static_cast<FPT>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
|
return FP::FPToFixed<FPT>(isize, static_cast<FPT>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
|
||||||
}
|
})};
|
||||||
)
|
|
||||||
};
|
|
||||||
},
|
},
|
||||||
mp::cartesian_product<fbits_list, rounding_list>{}
|
mp::cartesian_product<fbits_list, rounding_list>{});
|
||||||
);
|
|
||||||
|
|
||||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||||
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
|
@ -1653,7 +1645,7 @@ void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
||||||
|
|
||||||
code.movsx(tmp, from);
|
code.movsx(tmp, from);
|
||||||
code.cvtsi2ss(result, tmp);
|
code.cvtsi2ss(result, tmp);
|
||||||
|
@ -1673,7 +1665,7 @@ void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
||||||
|
|
||||||
code.movzx(tmp, from);
|
code.movzx(tmp, from);
|
||||||
code.cvtsi2ss(result, tmp);
|
code.cvtsi2ss(result, tmp);
|
||||||
|
@ -1718,14 +1710,14 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
|
||||||
const auto op = [&]{
|
const auto op = [&] {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512F)) {
|
if (code.HasHostFeature(HostFeature::AVX512F)) {
|
||||||
const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
|
const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
code.vcvtusi2ss(result, result, from.cvt32());
|
code.vcvtusi2ss(result, result, from.cvt32());
|
||||||
} else {
|
} else {
|
||||||
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
||||||
const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
|
const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||||
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
||||||
code.cvtsi2ss(result, from);
|
code.cvtsi2ss(result, from);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1754,7 +1746,7 @@ void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
||||||
|
|
||||||
code.movsx(tmp, from);
|
code.movsx(tmp, from);
|
||||||
code.cvtsi2sd(result, tmp);
|
code.cvtsi2sd(result, tmp);
|
||||||
|
@ -1774,7 +1766,7 @@ void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
||||||
|
|
||||||
code.movzx(tmp, from);
|
code.movzx(tmp, from);
|
||||||
code.cvtsi2sd(result, tmp);
|
code.cvtsi2sd(result, tmp);
|
||||||
|
@ -1793,7 +1785,7 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
||||||
|
|
||||||
code.cvtsi2sd(result, from);
|
code.cvtsi2sd(result, from);
|
||||||
|
|
||||||
|
@ -1810,7 +1802,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm();
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX512F)) {
|
if (code.HasHostFeature(HostFeature::AVX512F)) {
|
||||||
const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
|
const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
@ -1818,7 +1810,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
} else {
|
} else {
|
||||||
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
||||||
const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
|
const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||||
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
||||||
code.cvtsi2sd(to, from);
|
code.cvtsi2sd(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1943,4 +1935,4 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -98,7 +98,7 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.movdqa(tmp_b, xmm_b);
|
code.movdqa(tmp_b, xmm_b);
|
||||||
code.paddw(tmp_a, code.MConst(xword, 0x80008000));
|
code.paddw(tmp_a, code.MConst(xword, 0x80008000));
|
||||||
code.paddw(tmp_b, code.MConst(xword, 0x80008000));
|
code.paddw(tmp_b, code.MConst(xword, 0x80008000));
|
||||||
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
@ -205,7 +205,7 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movdqa(xmm_ge, xmm_a);
|
code.movdqa(xmm_ge, xmm_a);
|
||||||
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
||||||
code.pcmpeqw(xmm_ge, xmm_a);
|
code.pcmpeqw(xmm_ge, xmm_a);
|
||||||
|
|
||||||
code.psubw(xmm_a, xmm_b);
|
code.psubw(xmm_a, xmm_b);
|
||||||
|
@ -226,7 +226,7 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.paddw(xmm_a, code.MConst(xword, 0x80008000));
|
code.paddw(xmm_a, code.MConst(xword, 0x80008000));
|
||||||
code.paddw(xmm_b, code.MConst(xword, 0x80008000));
|
code.paddw(xmm_b, code.MConst(xword, 0x80008000));
|
||||||
code.movdqa(xmm_ge, xmm_b);
|
code.movdqa(xmm_ge, xmm_b);
|
||||||
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||||
code.pxor(xmm_ge, ones);
|
code.pxor(xmm_ge, ones);
|
||||||
|
|
||||||
code.psubw(xmm_a, xmm_b);
|
code.psubw(xmm_a, xmm_b);
|
||||||
|
@ -709,4 +709,4 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -106,7 +106,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||||
ctx.reg_alloc.DefineValue(inst, addend);
|
ctx.reg_alloc.DefineValue(inst, addend);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||||
|
@ -312,4 +312,4 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -17,4 +17,4 @@ void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
|
code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -25,7 +25,7 @@ namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
template <typename Function>
|
template<typename Function>
|
||||||
static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Function>
|
template<typename Function>
|
||||||
static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Lambda>
|
template<typename Lambda>
|
||||||
static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
||||||
constexpr u32 stack_space = 2 * 16;
|
constexpr u32 stack_space = 2 * 16;
|
||||||
|
@ -72,7 +72,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Lambda>
|
template<typename Lambda>
|
||||||
static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
||||||
constexpr u32 stack_space = 2 * 16;
|
constexpr u32 stack_space = 2 * 16;
|
||||||
|
@ -97,7 +97,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Lambda>
|
template<typename Lambda>
|
||||||
static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
||||||
constexpr u32 stack_space = 3 * 16;
|
constexpr u32 stack_space = 3 * 16;
|
||||||
|
@ -125,7 +125,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Lambda>
|
template<typename Lambda>
|
||||||
static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
||||||
constexpr u32 stack_space = 3 * 16;
|
constexpr u32 stack_space = 3 * 16;
|
||||||
|
@ -168,7 +168,7 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (index % 2 == 1) {
|
if (index % 2 == 1) {
|
||||||
code.shr(dest, 8);
|
code.shr(dest, 8);
|
||||||
} else {
|
} else {
|
||||||
code.and_(dest, 0xFF); // TODO: Remove when zext handling is corrected
|
code.and_(dest, 0xFF); // TODO: Remove when zext handling is corrected
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -441,8 +441,8 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
|
||||||
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
|
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
|
if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
|
||||||
const u64 shift_matrix = shift_amount < 8
|
const u64 shift_matrix = shift_amount < 8
|
||||||
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
|
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
|
||||||
: 0x8080808080808080;
|
: 0x8080808080808080;
|
||||||
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
|
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -513,7 +513,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst)
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
static constexpr T VShift(T x, T y) {
|
static constexpr T VShift(T x, T y) {
|
||||||
const s8 shift_amount = static_cast<s8>(static_cast<u8>(y));
|
const s8 shift_amount = static_cast<s8>(static_cast<u8>(y));
|
||||||
const s64 bit_size = static_cast<s64>(Common::BitSize<T>());
|
const s64 bit_size = static_cast<s64>(Common::BitSize<T>());
|
||||||
|
@ -740,7 +740,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
static void EmitVectorCountLeadingZeros(VectorArray<T>& result, const VectorArray<T>& data) {
|
static void EmitVectorCountLeadingZeros(VectorArray<T>& result, const VectorArray<T>& data) {
|
||||||
for (size_t i = 0; i < result.size(); i++) {
|
for (size_t i = 0; i < result.size(); i++) {
|
||||||
T element = data[i];
|
T element = data[i];
|
||||||
|
@ -1875,7 +1875,7 @@ void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b){
|
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b) {
|
||||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -1955,7 +1955,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b){
|
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b) {
|
||||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -2184,7 +2184,7 @@ void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.paddd(xmm_a, tmp);
|
code.paddd(xmm_a, tmp);
|
||||||
code.pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code.psrad(xmm_a, 16);
|
code.psrad(xmm_a, 16);
|
||||||
code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above.
|
code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above.
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
|
@ -2413,7 +2413,7 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename Function>
|
template<typename T, typename Function>
|
||||||
static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y, Function fn) {
|
static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y, Function fn) {
|
||||||
const size_t range = x.size() / 2;
|
const size_t range = x.size() / 2;
|
||||||
|
|
||||||
|
@ -2426,12 +2426,12 @@ static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, con
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
static void PairedMax(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
|
static void PairedMax(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
|
||||||
PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); });
|
PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); });
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
static void PairedMin(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
|
static void PairedMin(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
|
||||||
PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); });
|
PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); });
|
||||||
}
|
}
|
||||||
|
@ -2606,7 +2606,7 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename D, typename T>
|
template<typename D, typename T>
|
||||||
static D PolynomialMultiply(T lhs, T rhs) {
|
static D PolynomialMultiply(T lhs, T rhs) {
|
||||||
constexpr size_t bit_size = Common::BitSize<T>();
|
constexpr size_t bit_size = Common::BitSize<T>();
|
||||||
const std::bitset<bit_size> operand(lhs);
|
const std::bitset<bit_size> operand(lhs);
|
||||||
|
@ -2762,8 +2762,8 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.movdqa(high_a, low_a);
|
code.movdqa(high_a, low_a);
|
||||||
code.psrlw(high_a, 4);
|
code.psrlw(high_a, 4);
|
||||||
code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
|
code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
|
||||||
code.pand(high_a, tmp1); // High nibbles
|
code.pand(high_a, tmp1); // High nibbles
|
||||||
code.pand(low_a, tmp1); // Low nibbles
|
code.pand(low_a, tmp1); // Low nibbles
|
||||||
|
|
||||||
code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201));
|
code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201));
|
||||||
code.movdqa(tmp2, tmp1);
|
code.movdqa(tmp2, tmp1);
|
||||||
|
@ -2930,7 +2930,7 @@ void EmitX64::EmitVectorRoundingHalvingAddU32(EmitContext& ctx, IR::Inst* inst)
|
||||||
EmitVectorRoundingHalvingAddUnsigned(32, ctx, inst, code);
|
EmitVectorRoundingHalvingAddUnsigned(32, ctx, inst, code);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename U>
|
template<typename T, typename U>
|
||||||
static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, const VectorArray<U>& rhs) {
|
static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, const VectorArray<U>& rhs) {
|
||||||
using signed_type = std::make_signed_t<T>;
|
using signed_type = std::make_signed_t<T>;
|
||||||
using unsigned_type = std::make_unsigned_t<T>;
|
using unsigned_type = std::make_unsigned_t<T>;
|
||||||
|
@ -2947,8 +2947,7 @@ static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, co
|
||||||
out[i] = static_cast<T>(static_cast<unsigned_type>(lhs[i]) << extended_shift);
|
out[i] = static_cast<T>(static_cast<unsigned_type>(lhs[i]) << extended_shift);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ((std::is_unsigned_v<T> && extended_shift < -bit_size) ||
|
if ((std::is_unsigned_v<T> && extended_shift < -bit_size) || (std::is_signed_v<T> && extended_shift <= -bit_size)) {
|
||||||
(std::is_signed_v<T> && extended_shift <= -bit_size)) {
|
|
||||||
out[i] = 0;
|
out[i] = 0;
|
||||||
} else {
|
} else {
|
||||||
const s64 shift_value = -extended_shift - 1;
|
const s64 shift_value = -extended_shift - 1;
|
||||||
|
@ -3350,7 +3349,6 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
|
||||||
ctx.reg_alloc.DefineValue(inst, data);
|
ctx.reg_alloc.DefineValue(inst, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void EmitX64::EmitVectorSignedSaturatedAbs8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSignedSaturatedAbs8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorSignedSaturatedAbs(8, code, ctx, inst);
|
EmitVectorSignedSaturatedAbs(8, code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
@ -3869,7 +3867,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
ASSERT(code.HasHostFeature(HostFeature::SSE41));
|
ASSERT(code.HasHostFeature(HostFeature::SSE41));
|
||||||
code.packusdw(dest, dest); // SSE4.1
|
code.packusdw(dest, dest); // SSE4.1
|
||||||
code.movdqa(reconstructed, dest);
|
code.movdqa(reconstructed, dest);
|
||||||
code.punpcklwd(reconstructed, zero);
|
code.punpcklwd(reconstructed, zero);
|
||||||
break;
|
break;
|
||||||
|
@ -4024,10 +4022,10 @@ void EmitX64::EmitVectorSignedSaturatedNeg64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// MSVC requires the capture within the saturate lambda, but it's
|
// MSVC requires the capture within the saturate lambda, but it's
|
||||||
// determined to be unnecessary via clang and GCC.
|
// determined to be unnecessary via clang and GCC.
|
||||||
#ifdef __clang__
|
#ifdef __clang__
|
||||||
#pragma clang diagnostic push
|
# pragma clang diagnostic push
|
||||||
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
|
# pragma clang diagnostic ignored "-Wunused-lambda-capture"
|
||||||
#endif
|
#endif
|
||||||
template <typename T, typename U = std::make_unsigned_t<T>>
|
template<typename T, typename U = std::make_unsigned_t<T>>
|
||||||
static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
||||||
static_assert(std::is_signed_v<T>, "T must be signed.");
|
static_assert(std::is_signed_v<T>, "T must be signed.");
|
||||||
|
|
||||||
|
@ -4066,7 +4064,7 @@ static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArra
|
||||||
return qc_flag;
|
return qc_flag;
|
||||||
}
|
}
|
||||||
#ifdef __clang__
|
#ifdef __clang__
|
||||||
#pragma clang diagnostic pop
|
# pragma clang diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -4085,7 +4083,7 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i
|
||||||
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s64>);
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s64>);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename U = std::make_unsigned_t<T>>
|
template<typename T, typename U = std::make_unsigned_t<T>>
|
||||||
static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
||||||
static_assert(std::is_signed_v<T>, "T must be signed.");
|
static_assert(std::is_signed_v<T>, "T must be signed.");
|
||||||
|
|
||||||
|
@ -4166,7 +4164,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
|
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
|
||||||
|
|
||||||
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); });
|
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem) { return !elem.IsVoid(); });
|
||||||
const bool is_defaults_zero = inst->GetArg(0).IsZero();
|
const bool is_defaults_zero = inst->GetArg(0).IsZero();
|
||||||
|
|
||||||
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
|
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
|
||||||
|
@ -4318,8 +4316,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
result[i] = table[index][elem];
|
result[i] = table[index][elem];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
);
|
|
||||||
|
|
||||||
code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
|
code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
|
||||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||||
|
@ -4333,7 +4330,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
|
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
|
||||||
|
|
||||||
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); });
|
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem) { return !elem.IsVoid(); });
|
||||||
const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector;
|
const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector;
|
||||||
|
|
||||||
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
|
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
|
||||||
|
@ -4448,8 +4445,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
result[i] = table[index][elem];
|
result[i] = table[index][elem];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
);
|
|
||||||
|
|
||||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
|
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
|
||||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||||
|
@ -4732,7 +4728,7 @@ void EmitX64::EmitVectorUnsignedRecipSqrtEstimate(EmitContext& ctx, IR::Inst* in
|
||||||
|
|
||||||
// Simple generic case for 8, 16, and 32-bit values. 64-bit values
|
// Simple generic case for 8, 16, and 32-bit values. 64-bit values
|
||||||
// will need to be special-cased as we can't simply use a larger integral size.
|
// will need to be special-cased as we can't simply use a larger integral size.
|
||||||
template <typename T, typename U = std::make_unsigned_t<T>>
|
template<typename T, typename U = std::make_unsigned_t<T>>
|
||||||
static bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray<U>& result, const VectorArray<T>& lhs, const VectorArray<T>& rhs) {
|
static bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray<U>& result, const VectorArray<T>& lhs, const VectorArray<T>& rhs) {
|
||||||
static_assert(std::is_signed_v<T>, "T must be signed.");
|
static_assert(std::is_signed_v<T>, "T must be signed.");
|
||||||
static_assert(Common::BitSize<T>() < 64, "T must be less than 64 bits in size.");
|
static_assert(Common::BitSize<T>() < 64, "T must be less than 64 bits in size.");
|
||||||
|
@ -4833,7 +4829,7 @@ void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* in
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename S = std::make_signed_t<T>>
|
template<typename T, typename S = std::make_signed_t<T>>
|
||||||
static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
||||||
static_assert(std::is_unsigned_v<T>, "T must be an unsigned type.");
|
static_assert(std::is_unsigned_v<T>, "T must be an unsigned type.");
|
||||||
|
|
||||||
|
@ -4937,7 +4933,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code.movq(a, a); // TODO: !IsLastUse
|
code.movq(a, a); // TODO: !IsLastUse
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -4948,4 +4944,4 @@ void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -36,21 +36,21 @@ using namespace Xbyak::util;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
#define FCODE(NAME) \
|
#define FCODE(NAME) \
|
||||||
[&code](auto... args){ \
|
[&code](auto... args) { \
|
||||||
if constexpr (fsize == 32) { \
|
if constexpr (fsize == 32) { \
|
||||||
code.NAME##s(args...); \
|
code.NAME##s(args...); \
|
||||||
} else { \
|
} else { \
|
||||||
code.NAME##d(args...); \
|
code.NAME##d(args...); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#define ICODE(NAME) \
|
#define ICODE(NAME) \
|
||||||
[&code](auto... args){ \
|
[&code](auto... args) { \
|
||||||
if constexpr (fsize == 32) { \
|
if constexpr (fsize == 32) { \
|
||||||
code.NAME##d(args...); \
|
code.NAME##d(args...); \
|
||||||
} else { \
|
} else { \
|
||||||
code.NAME##q(args...); \
|
code.NAME##q(args...); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Lambda>
|
template<typename Lambda>
|
||||||
|
@ -71,7 +71,7 @@ struct NaNHandler {
|
||||||
public:
|
public:
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
using function_type = void(*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR);
|
using function_type = void (*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR);
|
||||||
|
|
||||||
static function_type GetDefault() {
|
static function_type GetDefault() {
|
||||||
return GetDefaultImpl(std::make_index_sequence<narg - 1>{});
|
return GetDefaultImpl(std::make_index_sequence<narg - 1>{});
|
||||||
|
@ -294,13 +294,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
|
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
(code.*fn)(result);
|
(code.*fn)(result);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
result = ctx.reg_alloc.ScratchXmm();
|
result = ctx.reg_alloc.ScratchXmm();
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
fn(result, xmm_a);
|
fn(result, xmm_a);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -337,7 +337,8 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
}
|
}
|
||||||
|
|
||||||
enum CheckInputNaN {
|
enum CheckInputNaN {
|
||||||
Yes, No,
|
Yes,
|
||||||
|
No,
|
||||||
};
|
};
|
||||||
|
|
||||||
template<size_t fsize, template<typename> class Indexer, typename Function>
|
template<size_t fsize, template<typename> class Indexer, typename Function>
|
||||||
|
@ -352,11 +353,11 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
(code.*fn)(xmm_a, xmm_b);
|
(code.*fn)(xmm_a, xmm_b);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
fn(xmm_a, xmm_b);
|
fn(xmm_a, xmm_b);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -549,7 +550,7 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
@ -614,7 +615,7 @@ void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||||
code.cmpeqps(a, b);
|
code.cmpeqps(a, b);
|
||||||
});
|
});
|
||||||
|
@ -628,7 +629,7 @@ void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||||
code.cmpeqpd(a, b);
|
code.cmpeqpd(a, b);
|
||||||
});
|
});
|
||||||
|
@ -644,7 +645,7 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.cvtdq2ps(xmm, xmm);
|
code.cvtdq2ps(xmm, xmm);
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
||||||
|
@ -662,7 +663,7 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||||
code.vcvtqq2pd(xmm, xmm);
|
code.vcvtqq2pd(xmm, xmm);
|
||||||
} else if (code.HasHostFeature(HostFeature::SSE41)) {
|
} else if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||||
|
@ -713,7 +714,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
||||||
code.vcvtudq2ps(xmm, xmm);
|
code.vcvtudq2ps(xmm, xmm);
|
||||||
} else {
|
} else {
|
||||||
|
@ -763,7 +764,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||||
code.vcvtuqq2pd(xmm, xmm);
|
code.vcvtuqq2pd(xmm, xmm);
|
||||||
} else {
|
} else {
|
||||||
|
@ -828,7 +829,7 @@ void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||||
code.cmpltps(b, a);
|
code.cmpltps(b, a);
|
||||||
});
|
});
|
||||||
|
@ -842,7 +843,7 @@ void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||||
code.cmpltpd(b, a);
|
code.cmpltpd(b, a);
|
||||||
});
|
});
|
||||||
|
@ -856,7 +857,7 @@ void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||||
code.cmpleps(b, a);
|
code.cmpleps(b, a);
|
||||||
});
|
});
|
||||||
|
@ -870,7 +871,7 @@ void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
|
||||||
code.cmplepd(b, a);
|
code.cmplepd(b, a);
|
||||||
});
|
});
|
||||||
|
@ -891,7 +892,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
|
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
|
@ -936,49 +937,51 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitThreeOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b){
|
EmitThreeOpVectorOperation<fsize, DefaultIndexer>(
|
||||||
const Xbyak::Xmm mask = xmm0;
|
code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b) {
|
||||||
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm mask = xmm0;
|
||||||
|
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
if (ctx.FPCR(fpcr_controlled).FZ()) {
|
if (ctx.FPCR(fpcr_controlled).FZ()) {
|
||||||
const Xbyak::Xmm prev_xmm_b = xmm_b;
|
const Xbyak::Xmm prev_xmm_b = xmm_b;
|
||||||
xmm_b = ctx.reg_alloc.ScratchXmm();
|
xmm_b = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movaps(xmm_b, prev_xmm_b);
|
code.movaps(xmm_b, prev_xmm_b);
|
||||||
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
|
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
|
||||||
}
|
|
||||||
|
|
||||||
// What we are doing here is handling the case when the inputs are differently signed zeros.
|
|
||||||
// x86-64 treats differently signed zeros as equal while ARM does not.
|
|
||||||
// Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero.
|
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
|
||||||
FCODE(vcmpeqp)(mask, result, xmm_b);
|
|
||||||
if constexpr (is_max) {
|
|
||||||
FCODE(vandp)(eq, result, xmm_b);
|
|
||||||
FCODE(vmaxp)(result, result, xmm_b);
|
|
||||||
} else {
|
|
||||||
FCODE(vorp)(eq, result, xmm_b);
|
|
||||||
FCODE(vminp)(result, result, xmm_b);
|
|
||||||
}
|
|
||||||
FCODE(blendvp)(result, eq);
|
|
||||||
} else {
|
|
||||||
code.movaps(mask, result);
|
|
||||||
code.movaps(eq, result);
|
|
||||||
FCODE(cmpneqp)(mask, xmm_b);
|
|
||||||
|
|
||||||
if constexpr (is_max) {
|
|
||||||
code.andps(eq, xmm_b);
|
|
||||||
FCODE(maxp)(result, xmm_b);
|
|
||||||
} else {
|
|
||||||
code.orps(eq, xmm_b);
|
|
||||||
FCODE(minp)(result, xmm_b);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
code.andps(result, mask);
|
// What we are doing here is handling the case when the inputs are differently signed zeros.
|
||||||
code.andnps(mask, eq);
|
// x86-64 treats differently signed zeros as equal while ARM does not.
|
||||||
code.orps(result, mask);
|
// Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero.
|
||||||
}
|
|
||||||
}, CheckInputNaN::Yes);
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
|
FCODE(vcmpeqp)(mask, result, xmm_b);
|
||||||
|
if constexpr (is_max) {
|
||||||
|
FCODE(vandp)(eq, result, xmm_b);
|
||||||
|
FCODE(vmaxp)(result, result, xmm_b);
|
||||||
|
} else {
|
||||||
|
FCODE(vorp)(eq, result, xmm_b);
|
||||||
|
FCODE(vminp)(result, result, xmm_b);
|
||||||
|
}
|
||||||
|
FCODE(blendvp)(result, eq);
|
||||||
|
} else {
|
||||||
|
code.movaps(mask, result);
|
||||||
|
code.movaps(eq, result);
|
||||||
|
FCODE(cmpneqp)(mask, xmm_b);
|
||||||
|
|
||||||
|
if constexpr (is_max) {
|
||||||
|
code.andps(eq, xmm_b);
|
||||||
|
FCODE(maxp)(result, xmm_b);
|
||||||
|
} else {
|
||||||
|
code.orps(eq, xmm_b);
|
||||||
|
FCODE(minp)(result, xmm_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.andps(result, mask);
|
||||||
|
code.andnps(mask, eq);
|
||||||
|
code.orps(result, mask);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
CheckInputNaN::Yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1024,7 +1027,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1044,7 +1047,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.movaps(result, xmm_a);
|
code.movaps(result, xmm_a);
|
||||||
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
||||||
|
|
||||||
|
@ -1113,7 +1116,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
FCODE(vcmpunordp)(xmm0, result, operand);
|
FCODE(vcmpunordp)(xmm0, result, operand);
|
||||||
FCODE(vxorp)(twos, result, operand);
|
FCODE(vxorp)(twos, result, operand);
|
||||||
FCODE(mulp)(result, operand);
|
FCODE(mulp)(result, operand);
|
||||||
|
@ -1151,8 +1154,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||||
result[elementi] = sign | FP::FPValue<FPT, false, 0, 2>();
|
result[elementi] = sign | FP::FPValue<FPT, false, 0, 2>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
);
|
|
||||||
|
|
||||||
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
||||||
|
|
||||||
|
@ -1287,7 +1289,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||||
FCODE(vfnmadd231p)(result, operand1, operand2);
|
FCODE(vfnmadd231p)(result, operand1, operand2);
|
||||||
});
|
});
|
||||||
|
@ -1307,7 +1309,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
|
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||||
FCODE(vfnmadd231p)(result, operand1, operand2);
|
FCODE(vfnmadd231p)(result, operand1, operand2);
|
||||||
|
|
||||||
|
@ -1386,7 +1388,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
|
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a) {
|
||||||
FCODE(roundp)(result, xmm_a, round_imm);
|
FCODE(roundp)(result, xmm_a, round_imm);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1399,8 +1401,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
||||||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>
|
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||||
>;
|
|
||||||
using exact_list = mp::list<std::true_type, std::false_type>;
|
using exact_list = mp::list<std::true_type, std::false_type>;
|
||||||
|
|
||||||
static const auto lut = Common::GenerateLookupTableFromList(
|
static const auto lut = Common::GenerateLookupTableFromList(
|
||||||
|
@ -1416,12 +1417,9 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
for (size_t i = 0; i < output.size(); ++i) {
|
for (size_t i = 0; i < output.size(); ++i) {
|
||||||
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
|
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
|
||||||
}
|
}
|
||||||
}
|
})};
|
||||||
)
|
|
||||||
};
|
|
||||||
},
|
},
|
||||||
mp::cartesian_product<rounding_list, exact_list>{}
|
mp::cartesian_product<rounding_list, exact_list>{});
|
||||||
);
|
|
||||||
|
|
||||||
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||||
}
|
}
|
||||||
|
@ -1501,7 +1499,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||||
FCODE(vfnmadd231p)(result, operand1, operand2);
|
FCODE(vfnmadd231p)(result, operand1, operand2);
|
||||||
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
|
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
|
||||||
|
@ -1523,12 +1521,12 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
|
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||||
FCODE(vfnmadd231p)(result, operand1, operand2);
|
FCODE(vfnmadd231p)(result, operand1, operand2);
|
||||||
|
|
||||||
// An explanation for this is given in EmitFPRSqrtStepFused.
|
// An explanation for this is given in EmitFPRSqrtStepFused.
|
||||||
code.vmovaps(mask, GetVectorOf<fsize, fsize == 32 ? 0x7f000000 : 0x7fe0000000000000>(code));
|
code.vmovaps(mask, GetVectorOf<fsize, (fsize == 32 ? 0x7f000000 : 0x7fe0000000000000)>(code));
|
||||||
FCODE(vandp)(tmp, result, mask);
|
FCODE(vandp)(tmp, result, mask);
|
||||||
ICODE(vpcmpeq)(tmp, tmp, mask);
|
ICODE(vpcmpeq)(tmp, tmp, mask);
|
||||||
code.ptest(tmp, tmp);
|
code.ptest(tmp, tmp);
|
||||||
|
@ -1620,9 +1618,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
|
const int round_imm = [&] {
|
||||||
const int round_imm = [&]{
|
|
||||||
switch (rounding) {
|
switch (rounding) {
|
||||||
case FP::RoundingMode::ToNearest_TieEven:
|
case FP::RoundingMode::ToNearest_TieEven:
|
||||||
default:
|
default:
|
||||||
|
@ -1659,8 +1656,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = fsize == 32
|
const u64 scale_factor = fsize == 32
|
||||||
? static_cast<u64>(fbits + 127) << 23
|
? static_cast<u64>(fbits + 127) << 23
|
||||||
: static_cast<u64>(fbits + 1023) << 52;
|
: static_cast<u64>(fbits + 1023) << 52;
|
||||||
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1702,7 +1699,6 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
perform_conversion(src);
|
perform_conversion(src);
|
||||||
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, src);
|
ctx.reg_alloc.DefineValue(inst, src);
|
||||||
|
@ -1716,8 +1712,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
||||||
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
||||||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>
|
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||||
>;
|
|
||||||
|
|
||||||
static const auto lut = Common::GenerateLookupTableFromList(
|
static const auto lut = Common::GenerateLookupTableFromList(
|
||||||
[](auto arg) {
|
[](auto arg) {
|
||||||
|
@ -1732,12 +1727,9 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
for (size_t i = 0; i < output.size(); ++i) {
|
for (size_t i = 0; i < output.size(); ++i) {
|
||||||
output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr));
|
output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr));
|
||||||
}
|
}
|
||||||
}
|
})};
|
||||||
)
|
|
||||||
};
|
|
||||||
},
|
},
|
||||||
mp::cartesian_product<fbits_list, rounding_list>{}
|
mp::cartesian_product<fbits_list, rounding_list>{});
|
||||||
);
|
|
||||||
|
|
||||||
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
|
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
|
||||||
}
|
}
|
||||||
|
@ -1766,4 +1758,4 @@ void EmitX64::EmitFPVectorToUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPVectorToFixed<64, true>(code, ctx, inst);
|
EmitFPVectorToFixed<64, true>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -131,7 +131,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
void EmitX64::EmitVectorSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddsb, &Xbyak::CodeGenerator::paddb, &Xbyak::CodeGenerator::psubb);
|
EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddsb, &Xbyak::CodeGenerator::paddb, &Xbyak::CodeGenerator::psubb);
|
||||||
|
@ -321,4 +321,4 @@ void EmitX64::EmitVectorUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst)
|
||||||
ctx.reg_alloc.DefineValue(inst, tmp);
|
ctx.reg_alloc.DefineValue(inst, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -34,4 +34,4 @@ private:
|
||||||
std::unique_ptr<Impl> impl;
|
std::unique_ptr<Impl> impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -25,4 +25,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
|
||||||
// Do nothing
|
// Do nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/exception_handler.h"
|
|
||||||
|
|
||||||
#include <mach/mach.h>
|
#include <mach/mach.h>
|
||||||
#include <mach/message.h>
|
#include <mach/message.h>
|
||||||
|
|
||||||
|
@ -18,6 +16,7 @@
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
|
#include "dynarmic/backend/x64/exception_handler.h"
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
#include "dynarmic/common/cast_util.h"
|
#include "dynarmic/common/cast_util.h"
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
|
@ -36,7 +35,7 @@ struct CodeBlockInfo {
|
||||||
|
|
||||||
struct MachMessage {
|
struct MachMessage {
|
||||||
mach_msg_header_t head;
|
mach_msg_header_t head;
|
||||||
char data[2048]; ///< Arbitrary size
|
char data[2048]; ///< Arbitrary size
|
||||||
};
|
};
|
||||||
|
|
||||||
class MachHandler final {
|
class MachHandler final {
|
||||||
|
@ -64,7 +63,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
MachHandler::MachHandler() {
|
MachHandler::MachHandler() {
|
||||||
#define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x)
|
#define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x)
|
||||||
|
|
||||||
KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port));
|
KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port));
|
||||||
KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND));
|
KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND));
|
||||||
|
@ -74,7 +73,7 @@ MachHandler::MachHandler() {
|
||||||
mach_port_t prev;
|
mach_port_t prev;
|
||||||
KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev));
|
KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev));
|
||||||
|
|
||||||
#undef KCHECK
|
#undef KCHECK
|
||||||
|
|
||||||
thread = std::thread(&MachHandler::MessagePump, this);
|
thread = std::thread(&MachHandler::MessagePump, this);
|
||||||
}
|
}
|
||||||
|
@ -102,7 +101,7 @@ void MachHandler::MessagePump() {
|
||||||
}
|
}
|
||||||
|
|
||||||
mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
|
mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
|
||||||
if (mr != MACH_MSG_SUCCESS){
|
if (mr != MACH_MSG_SUCCESS) {
|
||||||
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
|
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -146,7 +145,7 @@ void MachHandler::RemoveCodeBlock(u64 rip) {
|
||||||
|
|
||||||
MachHandler mach_handler;
|
MachHandler mach_handler;
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
mig_external kern_return_t catch_mach_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t) {
|
mig_external kern_return_t catch_mach_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t) {
|
||||||
fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise\n");
|
fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise\n");
|
||||||
|
@ -161,14 +160,13 @@ mig_external kern_return_t catch_mach_exception_raise_state_identity(mach_port_t
|
||||||
mig_external kern_return_t catch_mach_exception_raise_state(
|
mig_external kern_return_t catch_mach_exception_raise_state(
|
||||||
mach_port_t /*exception_port*/,
|
mach_port_t /*exception_port*/,
|
||||||
exception_type_t exception,
|
exception_type_t exception,
|
||||||
const mach_exception_data_t /*code*/, // code[0] is as per kern_return.h, code[1] is rip.
|
const mach_exception_data_t /*code*/, // code[0] is as per kern_return.h, code[1] is rip.
|
||||||
mach_msg_type_number_t /*codeCnt*/,
|
mach_msg_type_number_t /*codeCnt*/,
|
||||||
int* flavor,
|
int* flavor,
|
||||||
const thread_state_t old_state,
|
const thread_state_t old_state,
|
||||||
mach_msg_type_number_t old_stateCnt,
|
mach_msg_type_number_t old_stateCnt,
|
||||||
thread_state_t new_state,
|
thread_state_t new_state,
|
||||||
mach_msg_type_number_t* new_stateCnt
|
mach_msg_type_number_t* new_stateCnt) {
|
||||||
) {
|
|
||||||
if (!flavor || !new_stateCnt) {
|
if (!flavor || !new_stateCnt) {
|
||||||
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n");
|
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n");
|
||||||
return KERN_INVALID_ARGUMENT;
|
return KERN_INVALID_ARGUMENT;
|
||||||
|
@ -191,9 +189,8 @@ mig_external kern_return_t catch_mach_exception_raise_state(
|
||||||
|
|
||||||
struct ExceptionHandler::Impl final {
|
struct ExceptionHandler::Impl final {
|
||||||
Impl(BlockOfCode& code)
|
Impl(BlockOfCode& code)
|
||||||
: code_begin(Common::BitCast<u64>(code.getCode()))
|
: code_begin(Common::BitCast<u64>(code.getCode()))
|
||||||
, code_end(code_begin + code.GetTotalCodeSize())
|
, code_end(code_begin + code.GetTotalCodeSize()) {}
|
||||||
{}
|
|
||||||
|
|
||||||
void SetCallback(std::function<FakeCall(u64)> cb) {
|
void SetCallback(std::function<FakeCall(u64)> cb) {
|
||||||
CodeBlockInfo cbi;
|
CodeBlockInfo cbi;
|
||||||
|
@ -227,4 +224,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
|
||||||
impl->SetCallback(cb);
|
impl->SetCallback(cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -5,19 +5,20 @@
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/exception_handler.h"
|
#include "dynarmic/backend/x64/exception_handler.h"
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
# include <signal.h>
|
||||||
|
# include <sys/ucontext.h>
|
||||||
|
#else
|
||||||
|
# include <signal.h>
|
||||||
|
# include <ucontext.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <signal.h>
|
|
||||||
#ifdef __APPLE__
|
|
||||||
#include <sys/ucontext.h>
|
|
||||||
#else
|
|
||||||
#include <ucontext.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
#include "dynarmic/common/cast_util.h"
|
#include "dynarmic/common/cast_util.h"
|
||||||
|
@ -121,16 +122,16 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||||
ASSERT(sig == SIGSEGV || sig == SIGBUS);
|
ASSERT(sig == SIGSEGV || sig == SIGBUS);
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rip)
|
# define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rip)
|
||||||
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rsp)
|
# define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rsp)
|
||||||
#elif defined(__linux__)
|
#elif defined(__linux__)
|
||||||
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RIP])
|
# define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RIP])
|
||||||
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RSP])
|
# define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RSP])
|
||||||
#elif defined(__FreeBSD__)
|
#elif defined(__FreeBSD__)
|
||||||
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.mc_rip)
|
# define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.mc_rip)
|
||||||
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.mc_rsp)
|
# define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.mc_rsp)
|
||||||
#else
|
#else
|
||||||
#error "Unknown platform"
|
# error "Unknown platform"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -152,26 +153,25 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||||
|
|
||||||
struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
|
struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
|
||||||
if (retry_sa->sa_flags & SA_SIGINFO) {
|
if (retry_sa->sa_flags & SA_SIGINFO) {
|
||||||
retry_sa->sa_sigaction(sig, info, raw_context);
|
retry_sa->sa_sigaction(sig, info, raw_context);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (retry_sa->sa_handler == SIG_DFL) {
|
if (retry_sa->sa_handler == SIG_DFL) {
|
||||||
signal(sig, SIG_DFL);
|
signal(sig, SIG_DFL);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (retry_sa->sa_handler == SIG_IGN) {
|
if (retry_sa->sa_handler == SIG_IGN) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
retry_sa->sa_handler(sig);
|
retry_sa->sa_handler(sig);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
struct ExceptionHandler::Impl final {
|
struct ExceptionHandler::Impl final {
|
||||||
Impl(BlockOfCode& code)
|
Impl(BlockOfCode& code)
|
||||||
: code_begin(Common::BitCast<u64>(code.getCode()))
|
: code_begin(Common::BitCast<u64>(code.getCode()))
|
||||||
, code_end(code_begin + code.GetTotalCodeSize())
|
, code_end(code_begin + code.GetTotalCodeSize()) {}
|
||||||
{}
|
|
||||||
|
|
||||||
void SetCallback(std::function<FakeCall(u64)> cb) {
|
void SetCallback(std::function<FakeCall(u64)> cb) {
|
||||||
CodeBlockInfo cbi;
|
CodeBlockInfo cbi;
|
||||||
|
@ -204,4 +204,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
|
||||||
impl->SetCallback(cb);
|
impl->SetCallback(cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,12 +3,12 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
#define WIN32_LEAN_AND_MEAN
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/block_of_code.h"
|
#include "dynarmic/backend/x64/block_of_code.h"
|
||||||
#include "dynarmic/backend/x64/exception_handler.h"
|
#include "dynarmic/backend/x64/exception_handler.h"
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
|
@ -187,14 +187,13 @@ struct ExceptionHandler::Impl final {
|
||||||
code.mov(code.ABI_PARAM1, Common::BitCast<u64>(&cb));
|
code.mov(code.ABI_PARAM1, Common::BitCast<u64>(&cb));
|
||||||
code.mov(code.ABI_PARAM2, code.ABI_PARAM3);
|
code.mov(code.ABI_PARAM2, code.ABI_PARAM3);
|
||||||
code.CallLambda(
|
code.CallLambda(
|
||||||
[](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx){
|
[](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx) {
|
||||||
FakeCall fc = cb_(ctx->Rip);
|
FakeCall fc = cb_(ctx->Rip);
|
||||||
|
|
||||||
ctx->Rsp -= sizeof(u64);
|
ctx->Rsp -= sizeof(u64);
|
||||||
*Common::BitCast<u64*>(ctx->Rsp) = fc.ret_rip;
|
*Common::BitCast<u64*>(ctx->Rsp) = fc.ret_rip;
|
||||||
ctx->Rip = fc.call_rip;
|
ctx->Rip = fc.call_rip;
|
||||||
}
|
});
|
||||||
);
|
|
||||||
code.add(code.rsp, 8);
|
code.add(code.rsp, 8);
|
||||||
code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution));
|
code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution));
|
||||||
code.ret();
|
code.ret();
|
||||||
|
@ -208,8 +207,8 @@ struct ExceptionHandler::Impl final {
|
||||||
unwind_info->Flags = UNW_FLAG_EHANDLER;
|
unwind_info->Flags = UNW_FLAG_EHANDLER;
|
||||||
unwind_info->SizeOfProlog = prolog_info.prolog_size;
|
unwind_info->SizeOfProlog = prolog_info.prolog_size;
|
||||||
unwind_info->CountOfCodes = static_cast<UBYTE>(prolog_info.number_of_unwind_code_entries);
|
unwind_info->CountOfCodes = static_cast<UBYTE>(prolog_info.number_of_unwind_code_entries);
|
||||||
unwind_info->FrameRegister = 0; // No frame register present
|
unwind_info->FrameRegister = 0; // No frame register present
|
||||||
unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
|
unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
|
||||||
// UNWIND_INFO::UnwindCode field:
|
// UNWIND_INFO::UnwindCode field:
|
||||||
const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
|
const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
|
||||||
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code));
|
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code));
|
||||||
|
@ -259,4 +258,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
|
||||||
impl->SetCallback(cb);
|
impl->SetCallback(cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,15 +3,16 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/interface/exclusive_monitor.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
#include "dynarmic/interface/exclusive_monitor.h"
|
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
|
|
||||||
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) :
|
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count)
|
||||||
exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {
|
: exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {
|
||||||
Unlock();
|
Unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,4 +57,4 @@ void ExclusiveMonitor::ClearProcessor(size_t processor_id) {
|
||||||
Unlock();
|
Unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic
|
} // namespace Dynarmic
|
||||||
|
|
|
@ -10,35 +10,35 @@
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
enum class HostFeature : u64 {
|
enum class HostFeature : u64 {
|
||||||
SSSE3 = 1ULL << 0,
|
SSSE3 = 1ULL << 0,
|
||||||
SSE41 = 1ULL << 1,
|
SSE41 = 1ULL << 1,
|
||||||
SSE42 = 1ULL << 2,
|
SSE42 = 1ULL << 2,
|
||||||
AVX = 1ULL << 3,
|
AVX = 1ULL << 3,
|
||||||
AVX2 = 1ULL << 4,
|
AVX2 = 1ULL << 4,
|
||||||
AVX512F = 1ULL << 5,
|
AVX512F = 1ULL << 5,
|
||||||
AVX512CD = 1ULL << 6,
|
AVX512CD = 1ULL << 6,
|
||||||
AVX512VL = 1ULL << 7,
|
AVX512VL = 1ULL << 7,
|
||||||
AVX512BW = 1ULL << 8,
|
AVX512BW = 1ULL << 8,
|
||||||
AVX512DQ = 1ULL << 9,
|
AVX512DQ = 1ULL << 9,
|
||||||
AVX512BITALG = 1ULL << 10,
|
AVX512BITALG = 1ULL << 10,
|
||||||
PCLMULQDQ = 1ULL << 11,
|
PCLMULQDQ = 1ULL << 11,
|
||||||
F16C = 1ULL << 12,
|
F16C = 1ULL << 12,
|
||||||
FMA = 1ULL << 13,
|
FMA = 1ULL << 13,
|
||||||
AES = 1ULL << 14,
|
AES = 1ULL << 14,
|
||||||
POPCNT = 1ULL << 15,
|
POPCNT = 1ULL << 15,
|
||||||
BMI1 = 1ULL << 16,
|
BMI1 = 1ULL << 16,
|
||||||
BMI2 = 1ULL << 17,
|
BMI2 = 1ULL << 17,
|
||||||
LZCNT = 1ULL << 18,
|
LZCNT = 1ULL << 18,
|
||||||
GFNI = 1ULL << 19,
|
GFNI = 1ULL << 19,
|
||||||
|
|
||||||
// Zen-based BMI2
|
// Zen-based BMI2
|
||||||
FastBMI2 = 1ULL << 20,
|
FastBMI2 = 1ULL << 20,
|
||||||
|
|
||||||
// Orthographic AVX512 features on 128 and 256 vectors
|
// Orthographic AVX512 features on 128 and 256 vectors
|
||||||
AVX512_Ortho = AVX512F | AVX512VL,
|
AVX512_Ortho = AVX512F | AVX512VL,
|
||||||
|
|
||||||
// Orthographic AVX512 features for both 32-bit and 64-bit floats
|
// Orthographic AVX512 features for both 32-bit and 64-bit floats
|
||||||
AVX512_OrthoFloat = AVX512_Ortho | AVX512DQ,
|
AVX512_OrthoFloat = AVX512_Ortho | AVX512DQ,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr HostFeature operator~(HostFeature f) {
|
constexpr HostFeature operator~(HostFeature f) {
|
||||||
|
@ -61,4 +61,4 @@ constexpr HostFeature operator&=(HostFeature& result, HostFeature f) {
|
||||||
return result = (result & f);
|
return result = (result & f);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,10 +3,11 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/hostloc.h"
|
||||||
|
|
||||||
#include <xbyak.h>
|
#include <xbyak.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/abi.h"
|
#include "dynarmic/backend/x64/abi.h"
|
||||||
#include "dynarmic/backend/x64/hostloc.h"
|
|
||||||
#include "dynarmic/backend/x64/stack_layout.h"
|
#include "dynarmic/backend/x64/stack_layout.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
@ -21,4 +22,4 @@ Xbyak::Xmm HostLocToXmm(HostLoc loc) {
|
||||||
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
|
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -13,10 +13,44 @@ namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
enum class HostLoc {
|
enum class HostLoc {
|
||||||
// Ordering of the registers is intentional. See also: HostLocToX64.
|
// Ordering of the registers is intentional. See also: HostLocToX64.
|
||||||
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
|
RAX,
|
||||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
RCX,
|
||||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
RDX,
|
||||||
CF, PF, AF, ZF, SF, OF,
|
RBX,
|
||||||
|
RSP,
|
||||||
|
RBP,
|
||||||
|
RSI,
|
||||||
|
RDI,
|
||||||
|
R8,
|
||||||
|
R9,
|
||||||
|
R10,
|
||||||
|
R11,
|
||||||
|
R12,
|
||||||
|
R13,
|
||||||
|
R14,
|
||||||
|
R15,
|
||||||
|
XMM0,
|
||||||
|
XMM1,
|
||||||
|
XMM2,
|
||||||
|
XMM3,
|
||||||
|
XMM4,
|
||||||
|
XMM5,
|
||||||
|
XMM6,
|
||||||
|
XMM7,
|
||||||
|
XMM8,
|
||||||
|
XMM9,
|
||||||
|
XMM10,
|
||||||
|
XMM11,
|
||||||
|
XMM12,
|
||||||
|
XMM13,
|
||||||
|
XMM14,
|
||||||
|
XMM15,
|
||||||
|
CF,
|
||||||
|
PF,
|
||||||
|
AF,
|
||||||
|
ZF,
|
||||||
|
SF,
|
||||||
|
OF,
|
||||||
FirstSpill,
|
FirstSpill,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -111,4 +145,4 @@ const HostLocList any_xmm = {
|
||||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
|
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
|
||||||
Xbyak::Xmm HostLocToXmm(HostLoc loc);
|
Xbyak::Xmm HostLocToXmm(HostLoc loc);
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -10,18 +10,17 @@
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
struct JitStateInfo {
|
struct JitStateInfo {
|
||||||
template <typename JitStateType>
|
template<typename JitStateType>
|
||||||
JitStateInfo(const JitStateType&)
|
JitStateInfo(const JitStateType&)
|
||||||
: offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR))
|
: offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR))
|
||||||
, offsetof_asimd_MXCSR(offsetof(JitStateType, asimd_MXCSR))
|
, offsetof_asimd_MXCSR(offsetof(JitStateType, asimd_MXCSR))
|
||||||
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
|
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
|
||||||
, rsb_ptr_mask(JitStateType::RSBPtrMask)
|
, rsb_ptr_mask(JitStateType::RSBPtrMask)
|
||||||
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
|
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
|
||||||
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
|
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
|
||||||
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
|
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
|
||||||
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
|
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
|
||||||
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
|
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) {}
|
||||||
{}
|
|
||||||
|
|
||||||
const size_t offsetof_guest_MXCSR;
|
const size_t offsetof_guest_MXCSR;
|
||||||
const size_t offsetof_asimd_MXCSR;
|
const size_t offsetof_asimd_MXCSR;
|
||||||
|
@ -34,4 +33,4 @@ struct JitStateInfo {
|
||||||
const size_t offsetof_fpsr_qc;
|
const size_t offsetof_fpsr_qc;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
|
||||||
#include "dynarmic/common/bit_util.h"
|
#include "dynarmic/common/bit_util.h"
|
||||||
|
#include "dynarmic/common/common_types.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64::NZCV {
|
namespace Dynarmic::Backend::X64::NZCV {
|
||||||
|
|
||||||
|
@ -50,4 +50,4 @@ inline u32 FromX64(u32 x64_flags) {
|
||||||
return ((x64_flags & x64_mask) * from_x64_multiplier) & arm_mask;
|
return ((x64_flags & x64_mask) * from_x64_multiplier) & arm_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64::NZCV
|
} // namespace Dynarmic::Backend::X64::NZCV
|
||||||
|
|
|
@ -12,9 +12,12 @@
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
struct OpArg {
|
struct OpArg {
|
||||||
OpArg() : type(Type::Operand), inner_operand() {}
|
OpArg()
|
||||||
/* implicit */ OpArg(const Xbyak::Address& address) : type(Type::Address), inner_address(address) {}
|
: type(Type::Operand), inner_operand() {}
|
||||||
/* implicit */ OpArg(const Xbyak::Reg& reg) : type(Type::Reg), inner_reg(reg) {}
|
/* implicit */ OpArg(const Xbyak::Address& address)
|
||||||
|
: type(Type::Address), inner_address(address) {}
|
||||||
|
/* implicit */ OpArg(const Xbyak::Reg& reg)
|
||||||
|
: type(Type::Reg), inner_reg(reg) {}
|
||||||
|
|
||||||
Xbyak::Operand& operator*() {
|
Xbyak::Operand& operator*() {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
@ -74,4 +77,4 @@ private:
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,22 +3,22 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/perf_map.h"
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/perf_map.h"
|
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
|
||||||
#include <cstdio>
|
# include <cstdio>
|
||||||
#include <cstdlib>
|
# include <cstdlib>
|
||||||
#include <mutex>
|
# include <mutex>
|
||||||
#include <sys/types.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include <fmt/format.h>
|
# include <fmt/format.h>
|
||||||
|
# include <sys/types.h>
|
||||||
|
# include <unistd.h>
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
# include "dynarmic/common/common_types.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ void OpenFile() {
|
||||||
|
|
||||||
std::setvbuf(file, nullptr, _IONBF, 0);
|
std::setvbuf(file, nullptr, _IONBF, 0);
|
||||||
}
|
}
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name) {
|
void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name) {
|
||||||
|
@ -64,7 +64,7 @@ void PerfMapRegister(const void* start, const void* end, std::string_view friend
|
||||||
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
|
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
|
||||||
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
|
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
|
||||||
}
|
}
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
void PerfMapClear() {
|
void PerfMapClear() {
|
||||||
std::lock_guard guard{mutex};
|
std::lock_guard guard{mutex};
|
||||||
|
@ -78,7 +78,7 @@ void PerfMapClear() {
|
||||||
OpenFile();
|
OpenFile();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
@ -86,10 +86,10 @@ namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
void PerfMapRegister(const void*, const void*, std::string_view) {}
|
void PerfMapRegister(const void*, const void*, std::string_view) {}
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
void PerfMapClear() {}
|
void PerfMapClear() {}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -13,7 +13,7 @@ namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name);
|
void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name);
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void PerfMapRegister(T start, const void* end, std::string_view friendly_name) {
|
void PerfMapRegister(T start, const void* end, std::string_view friendly_name) {
|
||||||
|
@ -22,4 +22,4 @@ void PerfMapRegister(T start, const void* end, std::string_view friendly_name) {
|
||||||
|
|
||||||
void PerfMapClear();
|
void PerfMapClear();
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -11,19 +13,18 @@
|
||||||
#include <xbyak.h>
|
#include <xbyak.h>
|
||||||
|
|
||||||
#include "dynarmic/backend/x64/abi.h"
|
#include "dynarmic/backend/x64/abi.h"
|
||||||
#include "dynarmic/backend/x64/reg_alloc.h"
|
|
||||||
#include "dynarmic/backend/x64/stack_layout.h"
|
#include "dynarmic/backend/x64/stack_layout.h"
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
#define MAYBE_AVX(OPCODE, ...) \
|
#define MAYBE_AVX(OPCODE, ...) \
|
||||||
[&] { \
|
[&] { \
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) { \
|
if (code.HasHostFeature(HostFeature::AVX)) { \
|
||||||
code.v##OPCODE(__VA_ARGS__); \
|
code.v##OPCODE(__VA_ARGS__); \
|
||||||
} else { \
|
} else { \
|
||||||
code.OPCODE(__VA_ARGS__); \
|
code.OPCODE(__VA_ARGS__); \
|
||||||
} \
|
} \
|
||||||
}()
|
}()
|
||||||
|
|
||||||
static bool CanExchange(HostLoc a, HostLoc b) {
|
static bool CanExchange(HostLoc a, HostLoc b) {
|
||||||
|
@ -57,7 +58,7 @@ static size_t GetBitWidth(IR::Type type) {
|
||||||
case IR::Type::U128:
|
case IR::Type::U128:
|
||||||
return 128;
|
return 128;
|
||||||
case IR::Type::NZCVFlags:
|
case IR::Type::NZCVFlags:
|
||||||
return 32; // TODO: Update to 16 when flags optimization is done
|
return 32; // TODO: Update to 16 when flags optimization is done
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -225,11 +226,10 @@ bool Argument::IsInMemory() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
RegAlloc::RegAlloc(BlockOfCode& code, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order)
|
RegAlloc::RegAlloc(BlockOfCode& code, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order)
|
||||||
: gpr_order(gpr_order)
|
: gpr_order(gpr_order)
|
||||||
, xmm_order(xmm_order)
|
, xmm_order(xmm_order)
|
||||||
, hostloc_info(NonSpillHostLocCount + SpillCount)
|
, hostloc_info(NonSpillHostLocCount + SpillCount)
|
||||||
, code(code)
|
, code(code) {}
|
||||||
{}
|
|
||||||
|
|
||||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||||
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||||
|
@ -382,13 +382,14 @@ HostLoc RegAlloc::ScratchImpl(const std::vector<HostLoc>& desired_locations) {
|
||||||
return location;
|
return location;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0,
|
void RegAlloc::HostCall(IR::Inst* result_def,
|
||||||
|
std::optional<Argument::copyable_reference> arg0,
|
||||||
std::optional<Argument::copyable_reference> arg1,
|
std::optional<Argument::copyable_reference> arg1,
|
||||||
std::optional<Argument::copyable_reference> arg2,
|
std::optional<Argument::copyable_reference> arg2,
|
||||||
std::optional<Argument::copyable_reference> arg3) {
|
std::optional<Argument::copyable_reference> arg3) {
|
||||||
constexpr size_t args_count = 4;
|
constexpr size_t args_count = 4;
|
||||||
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4 };
|
constexpr std::array<HostLoc, args_count> args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4};
|
||||||
const std::array<std::optional<Argument::copyable_reference>, args_count> args = { arg0, arg1, arg2, arg3 };
|
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3};
|
||||||
|
|
||||||
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
|
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
|
||||||
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
|
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
|
||||||
|
@ -420,7 +421,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_r
|
||||||
code.movzx(reg.cvt32(), reg.cvt16());
|
code.movzx(reg.cvt32(), reg.cvt16());
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break; // Nothing needs to be done
|
break; // Nothing needs to be done
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -717,4 +718,4 @@ Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) {
|
||||||
return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0])];
|
return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0])];
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -85,7 +85,8 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class RegAlloc;
|
friend class RegAlloc;
|
||||||
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
explicit Argument(RegAlloc& reg_alloc)
|
||||||
|
: reg_alloc(reg_alloc) {}
|
||||||
|
|
||||||
bool allocated = false;
|
bool allocated = false;
|
||||||
RegAlloc& reg_alloc;
|
RegAlloc& reg_alloc;
|
||||||
|
@ -170,4 +171,4 @@ private:
|
||||||
Xbyak::Address SpillToOpArg(HostLoc loc);
|
Xbyak::Address SpillToOpArg(HostLoc loc);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -14,8 +14,8 @@ namespace Dynarmic::Backend::X64 {
|
||||||
constexpr size_t SpillCount = 64;
|
constexpr size_t SpillCount = 64;
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(push)
|
# pragma warning(push)
|
||||||
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
|
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct alignas(16) StackLayout {
|
struct alignas(16) StackLayout {
|
||||||
|
@ -31,9 +31,9 @@ struct alignas(16) StackLayout {
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(pop)
|
# pragma warning(pop)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static_assert(sizeof(StackLayout) % 16 == 0);
|
static_assert(sizeof(StackLayout) % 16 == 0);
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -3,13 +3,13 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/assert.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
|
||||||
|
|
||||||
namespace Dynarmic::Common {
|
namespace Dynarmic::Common {
|
||||||
|
|
||||||
[[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args) {
|
[[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args) {
|
||||||
|
@ -18,4 +18,4 @@ namespace Dynarmic::Common {
|
||||||
std::terminate();
|
std::terminate();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
|
|
@ -15,57 +15,57 @@ namespace Dynarmic::Common {
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
template <typename... Ts>
|
template<typename... Ts>
|
||||||
[[noreturn]] void TerminateHelper(fmt::string_view msg, Ts... args) {
|
[[noreturn]] void TerminateHelper(fmt::string_view msg, Ts... args) {
|
||||||
Terminate(msg, fmt::make_format_args(args...));
|
Terminate(msg, fmt::make_format_args(args...));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
|
||||||
#if defined(__clang) || defined(__GNUC__)
|
#if defined(__clang) || defined(__GNUC__)
|
||||||
#define ASSUME(expr) [&]{ if (!(expr)) __builtin_unreachable(); }()
|
# define ASSUME(expr) [&] { if (!(expr)) __builtin_unreachable(); }()
|
||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
#define ASSUME(expr) __assume(expr)
|
# define ASSUME(expr) __assume(expr)
|
||||||
#else
|
#else
|
||||||
#define ASSUME(expr)
|
# define ASSUME(expr)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DYNARMIC_IGNORE_ASSERTS
|
#ifdef DYNARMIC_IGNORE_ASSERTS
|
||||||
#if defined(__clang) || defined(__GNUC__)
|
# if defined(__clang) || defined(__GNUC__)
|
||||||
#define UNREACHABLE() __builtin_unreachable()
|
# define UNREACHABLE() __builtin_unreachable()
|
||||||
#elif defined(_MSC_VER)
|
# elif defined(_MSC_VER)
|
||||||
#define UNREACHABLE() __assume(0)
|
# define UNREACHABLE() __assume(0)
|
||||||
#else
|
# else
|
||||||
#define UNREACHABLE()
|
# define UNREACHABLE()
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
#define ASSERT(expr) ASSUME(expr)
|
# define ASSERT(expr) ASSUME(expr)
|
||||||
#define ASSERT_MSG(expr, ...) ASSUME(expr)
|
# define ASSERT_MSG(expr, ...) ASSUME(expr)
|
||||||
#define ASSERT_FALSE(...) UNREACHABLE()
|
# define ASSERT_FALSE(...) UNREACHABLE()
|
||||||
#else
|
#else
|
||||||
#define UNREACHABLE() ASSERT_FALSE("Unreachable code!")
|
# define UNREACHABLE() ASSERT_FALSE("Unreachable code!")
|
||||||
|
|
||||||
#define ASSERT(expr) \
|
# define ASSERT(expr) \
|
||||||
[&]{ \
|
[&] { \
|
||||||
if (UNLIKELY(!(expr))) { \
|
if (UNLIKELY(!(expr))) { \
|
||||||
::Dynarmic::Common::detail::TerminateHelper(#expr); \
|
::Dynarmic::Common::detail::TerminateHelper(#expr); \
|
||||||
} \
|
} \
|
||||||
}()
|
}()
|
||||||
#define ASSERT_MSG(expr, ...) \
|
# define ASSERT_MSG(expr, ...) \
|
||||||
[&]{ \
|
[&] { \
|
||||||
if (UNLIKELY(!(expr))) { \
|
if (UNLIKELY(!(expr))) { \
|
||||||
::Dynarmic::Common::detail::TerminateHelper(#expr "\nMessage: " __VA_ARGS__); \
|
::Dynarmic::Common::detail::TerminateHelper(#expr "\nMessage: " __VA_ARGS__); \
|
||||||
} \
|
} \
|
||||||
}()
|
}()
|
||||||
#define ASSERT_FALSE(...) ::Dynarmic::Common::detail::TerminateHelper("false\nMessage: " __VA_ARGS__)
|
# define ASSERT_FALSE(...) ::Dynarmic::Common::detail::TerminateHelper("false\nMessage: " __VA_ARGS__)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(NDEBUG) || defined(DYNARMIC_IGNORE_ASSERTS)
|
#if defined(NDEBUG) || defined(DYNARMIC_IGNORE_ASSERTS)
|
||||||
#define DEBUG_ASSERT(expr) ASSUME(expr)
|
# define DEBUG_ASSERT(expr) ASSUME(expr)
|
||||||
#define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr)
|
# define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr)
|
||||||
#else
|
#else
|
||||||
#define DEBUG_ASSERT(expr) ASSERT(expr)
|
# define DEBUG_ASSERT(expr) ASSERT(expr)
|
||||||
#define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__)
|
# define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -21,7 +21,7 @@ constexpr size_t BitSize() {
|
||||||
return sizeof(T) * CHAR_BIT;
|
return sizeof(T) * CHAR_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
constexpr T Ones(size_t count) {
|
constexpr T Ones(size_t count) {
|
||||||
ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
|
ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
|
||||||
if (count == BitSize<T>())
|
if (count == BitSize<T>())
|
||||||
|
@ -72,8 +72,8 @@ constexpr T ModifyBits(const T value, const T new_bits) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(push)
|
# pragma warning(push)
|
||||||
#pragma warning(disable:4554)
|
# pragma warning(disable : 4554)
|
||||||
#endif
|
#endif
|
||||||
/// Extracts a single bit at bit_position from value of type T.
|
/// Extracts a single bit at bit_position from value of type T.
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -123,7 +123,7 @@ constexpr T ModifyBit(const T value, bool new_bit) {
|
||||||
return ModifyBit<T>(bit_position, value, new_bit);
|
return ModifyBit<T>(bit_position, value, new_bit);
|
||||||
}
|
}
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(pop)
|
# pragma warning(pop)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
|
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
|
||||||
|
@ -152,12 +152,12 @@ inline T SignExtend(const size_t bit_count, const T value) {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Integral>
|
template<typename Integral>
|
||||||
inline size_t BitCount(Integral value) {
|
inline size_t BitCount(Integral value) {
|
||||||
return std::bitset<BitSize<Integral>()>(value).count();
|
return std::bitset<BitSize<Integral>()>(value).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
constexpr size_t CountLeadingZeros(T value) {
|
constexpr size_t CountLeadingZeros(T value) {
|
||||||
auto x = static_cast<std::make_unsigned_t<T>>(value);
|
auto x = static_cast<std::make_unsigned_t<T>>(value);
|
||||||
size_t result = BitSize<T>();
|
size_t result = BitSize<T>();
|
||||||
|
@ -168,7 +168,7 @@ constexpr size_t CountLeadingZeros(T value) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
constexpr int HighestSetBit(T value) {
|
constexpr int HighestSetBit(T value) {
|
||||||
auto x = static_cast<std::make_unsigned_t<T>>(value);
|
auto x = static_cast<std::make_unsigned_t<T>>(value);
|
||||||
int result = -1;
|
int result = -1;
|
||||||
|
@ -179,7 +179,7 @@ constexpr int HighestSetBit(T value) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
constexpr size_t LowestSetBit(T value) {
|
constexpr size_t LowestSetBit(T value) {
|
||||||
auto x = static_cast<std::make_unsigned_t<T>>(value);
|
auto x = static_cast<std::make_unsigned_t<T>>(value);
|
||||||
if (x == 0)
|
if (x == 0)
|
||||||
|
@ -193,12 +193,12 @@ constexpr size_t LowestSetBit(T value) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
constexpr bool MostSignificantBit(T value) {
|
constexpr bool MostSignificantBit(T value) {
|
||||||
return Bit<BitSize<T>() - 1, T>(value);
|
return Bit<BitSize<T>() - 1, T>(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
inline T Replicate(T value, size_t element_size) {
|
inline T Replicate(T value, size_t element_size) {
|
||||||
ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");
|
ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");
|
||||||
if (element_size == BitSize<T>())
|
if (element_size == BitSize<T>())
|
||||||
|
@ -206,7 +206,7 @@ inline T Replicate(T value, size_t element_size) {
|
||||||
return Replicate(value | (value << element_size), element_size * 2);
|
return Replicate(value | (value << element_size), element_size * 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
constexpr T RotateRight(T value, size_t amount) {
|
constexpr T RotateRight(T value, size_t amount) {
|
||||||
amount %= BitSize<T>();
|
amount %= BitSize<T>();
|
||||||
|
|
||||||
|
@ -219,8 +219,8 @@ constexpr T RotateRight(T value, size_t amount) {
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u32 SwapHalves32(u32 value) {
|
constexpr u32 SwapHalves32(u32 value) {
|
||||||
return ((value & 0xFFFF0000U) >> 16) |
|
return ((value & 0xFFFF0000U) >> 16)
|
||||||
((value & 0x0000FFFFU) << 16);
|
| ((value & 0x0000FFFFU) << 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u16 SwapBytes16(u16 value) {
|
constexpr u16 SwapBytes16(u16 value) {
|
||||||
|
@ -228,21 +228,21 @@ constexpr u16 SwapBytes16(u16 value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u32 SwapBytes32(u32 value) {
|
constexpr u32 SwapBytes32(u32 value) {
|
||||||
return ((value & 0xFF000000U) >> 24) |
|
return ((value & 0xFF000000U) >> 24)
|
||||||
((value & 0x00FF0000U) >> 8) |
|
| ((value & 0x00FF0000U) >> 8)
|
||||||
((value & 0x0000FF00U) << 8) |
|
| ((value & 0x0000FF00U) << 8)
|
||||||
((value & 0x000000FFU) << 24);
|
| ((value & 0x000000FFU) << 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u64 SwapBytes64(u64 value) {
|
constexpr u64 SwapBytes64(u64 value) {
|
||||||
return ((value & 0xFF00000000000000ULL) >> 56) |
|
return ((value & 0xFF00000000000000ULL) >> 56)
|
||||||
((value & 0x00FF000000000000ULL) >> 40) |
|
| ((value & 0x00FF000000000000ULL) >> 40)
|
||||||
((value & 0x0000FF0000000000ULL) >> 24) |
|
| ((value & 0x0000FF0000000000ULL) >> 24)
|
||||||
((value & 0x000000FF00000000ULL) >> 8) |
|
| ((value & 0x000000FF00000000ULL) >> 8)
|
||||||
((value & 0x00000000FF000000ULL) << 8) |
|
| ((value & 0x00000000FF000000ULL) << 8)
|
||||||
((value & 0x0000000000FF0000ULL) << 24) |
|
| ((value & 0x0000000000FF0000ULL) << 24)
|
||||||
((value & 0x000000000000FF00ULL) << 40) |
|
| ((value & 0x000000000000FF00ULL) << 40)
|
||||||
((value & 0x00000000000000FFULL) << 56);
|
| ((value & 0x00000000000000FFULL) << 56);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
namespace Dynarmic::Common {
|
namespace Dynarmic::Common {
|
||||||
|
|
||||||
/// Reinterpret objects of one type as another by bit-casting between object representations.
|
/// Reinterpret objects of one type as another by bit-casting between object representations.
|
||||||
template <class Dest, class Source>
|
template<class Dest, class Source>
|
||||||
inline Dest BitCast(const Source& source) noexcept {
|
inline Dest BitCast(const Source& source) noexcept {
|
||||||
static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
|
static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
|
||||||
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
||||||
|
@ -26,7 +26,7 @@ inline Dest BitCast(const Source& source) noexcept {
|
||||||
|
|
||||||
/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
|
/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
|
||||||
/// Note that here we do not verify if source has enough bytes to read from.
|
/// Note that here we do not verify if source has enough bytes to read from.
|
||||||
template <class Dest, class SourcePtr>
|
template<class Dest, class SourcePtr>
|
||||||
inline Dest BitCastPointee(const SourcePtr source) noexcept {
|
inline Dest BitCastPointee(const SourcePtr source) noexcept {
|
||||||
static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
|
static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
|
||||||
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
||||||
|
@ -37,9 +37,9 @@ inline Dest BitCastPointee(const SourcePtr source) noexcept {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cast a lambda into an equivalent function pointer.
|
/// Cast a lambda into an equivalent function pointer.
|
||||||
template <class Function>
|
template<class Function>
|
||||||
inline auto FptrCast(Function f) noexcept {
|
inline auto FptrCast(Function f) noexcept {
|
||||||
return static_cast<mp::equivalent_function_type<Function>*>(f);
|
return static_cast<mp::equivalent_function_type<Function>*>(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
|
|
@ -3,56 +3,55 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/crypto/aes.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/crypto/aes.h"
|
|
||||||
|
|
||||||
namespace Dynarmic::Common::Crypto::AES {
|
namespace Dynarmic::Common::Crypto::AES {
|
||||||
|
|
||||||
using SubstitutionTable = std::array<u8, 256>;
|
using SubstitutionTable = std::array<u8, 256>;
|
||||||
|
|
||||||
// See section 5.1.1 Figure 7 in FIPS 197
|
// See section 5.1.1 Figure 7 in FIPS 197
|
||||||
constexpr SubstitutionTable substitution_box{{
|
constexpr SubstitutionTable substitution_box{
|
||||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
{// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||||
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
|
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
|
||||||
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
|
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
|
||||||
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
|
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
|
||||||
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
|
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
|
||||||
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
|
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
|
||||||
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
|
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
|
||||||
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
|
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
|
||||||
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
|
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
|
||||||
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
|
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
|
||||||
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
|
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
|
||||||
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
|
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
|
||||||
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
|
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
|
||||||
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
|
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
|
||||||
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
|
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
|
||||||
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
|
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
|
||||||
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
|
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16}};
|
||||||
}};
|
|
||||||
|
|
||||||
// See section 5.3.2 Figure 14 in FIPS 197
|
// See section 5.3.2 Figure 14 in FIPS 197
|
||||||
constexpr SubstitutionTable inverse_substitution_box{{
|
constexpr SubstitutionTable inverse_substitution_box{
|
||||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
{// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||||
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
|
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
|
||||||
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
|
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
|
||||||
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
|
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
|
||||||
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
|
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
|
||||||
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
|
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
|
||||||
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
|
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
|
||||||
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
|
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
|
||||||
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
|
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
|
||||||
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
|
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
|
||||||
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
|
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
|
||||||
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
|
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
|
||||||
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
|
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
|
||||||
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
|
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
|
||||||
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
|
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
|
||||||
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
|
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
|
||||||
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
|
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D}};
|
||||||
}};
|
|
||||||
|
|
||||||
// See section 4.2.1 in FIPS 197.
|
// See section 4.2.1 in FIPS 197.
|
||||||
static constexpr u8 xtime(u8 x) {
|
static constexpr u8 xtime(u8 x) {
|
||||||
|
@ -61,11 +60,11 @@ static constexpr u8 xtime(u8 x) {
|
||||||
|
|
||||||
// Galois Field multiplication.
|
// Galois Field multiplication.
|
||||||
static constexpr u8 Multiply(u8 x, u8 y) {
|
static constexpr u8 Multiply(u8 x, u8 y) {
|
||||||
return static_cast<u8>(((y & 1) * x) ^
|
return static_cast<u8>(((y & 1) * x)
|
||||||
((y >> 1 & 1) * xtime(x)) ^
|
^ ((y >> 1 & 1) * xtime(x))
|
||||||
((y >> 2 & 1) * xtime(xtime(x))) ^
|
^ ((y >> 2 & 1) * xtime(xtime(x)))
|
||||||
((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^
|
^ ((y >> 3 & 1) * xtime(xtime(xtime(x))))
|
||||||
((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
|
^ ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ShiftRows(State& out_state, const State& state) {
|
static void ShiftRows(State& out_state, const State& state) {
|
||||||
|
@ -178,4 +177,4 @@ void InverseMixColumns(State& out_state, const State& state) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common::Crypto::AES
|
} // namespace Dynarmic::Common::Crypto::AES
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
|
|
||||||
namespace Dynarmic::Common::Crypto::AES {
|
namespace Dynarmic::Common::Crypto::AES {
|
||||||
|
@ -19,4 +20,4 @@ void EncryptSingleRound(State& out_state, const State& state);
|
||||||
void MixColumns(State& out_state, const State& state);
|
void MixColumns(State& out_state, const State& state);
|
||||||
void InverseMixColumns(State& out_state, const State& state);
|
void InverseMixColumns(State& out_state, const State& state);
|
||||||
|
|
||||||
} // namespace Dynarmic::Common::Crypto::AES
|
} // namespace Dynarmic::Common::Crypto::AES
|
||||||
|
|
|
@ -3,150 +3,149 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/crypto/crc32.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/crypto/crc32.h"
|
|
||||||
|
|
||||||
namespace Dynarmic::Common::Crypto::CRC32 {
|
namespace Dynarmic::Common::Crypto::CRC32 {
|
||||||
|
|
||||||
using CRC32Table = std::array<u32, 256>;
|
using CRC32Table = std::array<u32, 256>;
|
||||||
|
|
||||||
// CRC32 algorithm that uses polynomial 0x1EDC6F41
|
// CRC32 algorithm that uses polynomial 0x1EDC6F41
|
||||||
constexpr CRC32Table castagnoli_table{{
|
constexpr CRC32Table castagnoli_table{
|
||||||
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
|
{0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
|
||||||
0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
|
0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
|
||||||
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
|
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
|
||||||
0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
|
0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
|
||||||
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
|
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
|
||||||
0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
|
0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
|
||||||
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
|
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
|
||||||
0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
|
0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
|
||||||
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
|
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
|
||||||
0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
|
0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
|
||||||
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
|
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
|
||||||
0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
|
0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
|
||||||
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
|
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
|
||||||
0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
|
0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
|
||||||
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
|
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
|
||||||
0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
|
0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
|
||||||
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
|
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
|
||||||
0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
|
0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
|
||||||
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
|
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
|
||||||
0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
|
0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
|
||||||
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
|
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
|
||||||
0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
|
0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
|
||||||
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
|
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
|
||||||
0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
|
0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
|
||||||
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
|
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
|
||||||
0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
|
0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
|
||||||
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
|
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
|
||||||
0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
|
0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
|
||||||
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
|
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
|
||||||
0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
|
0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
|
||||||
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
|
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
|
||||||
0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
|
0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
|
||||||
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
|
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
|
||||||
0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
|
0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
|
||||||
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
|
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
|
||||||
0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
|
0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
|
||||||
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
|
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
|
||||||
0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
|
0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
|
||||||
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
|
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
|
||||||
0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
|
0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
|
||||||
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
|
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
|
||||||
0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
|
0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
|
||||||
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
|
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
|
||||||
0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
|
0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
|
||||||
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
|
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
|
||||||
0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
|
0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
|
||||||
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
|
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
|
||||||
0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
|
0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
|
||||||
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
|
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
|
||||||
0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
|
0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
|
||||||
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
|
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
|
||||||
0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
|
0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
|
||||||
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
|
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
|
||||||
0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
|
0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
|
||||||
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
|
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
|
||||||
0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
|
0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
|
||||||
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
|
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
|
||||||
0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
|
0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
|
||||||
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
|
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
|
||||||
0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
|
0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
|
||||||
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
|
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
|
||||||
0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
|
0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
|
||||||
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
|
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
|
||||||
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
|
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351}};
|
||||||
}};
|
|
||||||
|
|
||||||
// CRC32 algorithm that uses polynomial 0x04C11DB7
|
// CRC32 algorithm that uses polynomial 0x04C11DB7
|
||||||
constexpr CRC32Table iso_table{{
|
constexpr CRC32Table iso_table{
|
||||||
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
|
{0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
|
||||||
0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
|
0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
|
||||||
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
|
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
|
||||||
0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
|
0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
|
||||||
0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
|
0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
|
||||||
0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
|
0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
|
||||||
0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
|
0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
|
||||||
0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
|
0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
|
||||||
0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
|
0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
|
||||||
0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
|
0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
|
||||||
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
|
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
|
||||||
0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
|
0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
|
||||||
0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
|
0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
|
||||||
0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
|
0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
|
||||||
0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
|
0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
|
||||||
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
|
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
|
||||||
0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
|
0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
|
||||||
0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
|
0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
|
||||||
0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
|
0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
|
||||||
0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
|
0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
|
||||||
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
|
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
|
||||||
0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
|
0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
|
||||||
0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
|
0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
|
||||||
0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
|
0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
|
||||||
0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
|
0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
|
||||||
0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
|
0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
|
||||||
0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
|
0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
|
||||||
0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
|
0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
|
||||||
0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
|
0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
|
||||||
0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
|
0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
|
||||||
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
|
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
|
||||||
0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
|
0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
|
||||||
0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
|
0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
|
||||||
0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
|
0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
|
||||||
0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
|
0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
|
||||||
0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
|
0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
|
||||||
0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
|
0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
|
||||||
0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
|
0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
|
||||||
0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
|
0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
|
||||||
0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
|
0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
|
||||||
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
|
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
|
||||||
0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
|
0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
|
||||||
0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
|
0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
|
||||||
0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
|
0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
|
||||||
0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
|
0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
|
||||||
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
|
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
|
||||||
0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
|
0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
|
||||||
0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
|
0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
|
||||||
0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
|
0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
|
||||||
0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
|
0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
|
||||||
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
|
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
|
||||||
0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
|
0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
|
||||||
0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
|
0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
|
||||||
0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
|
0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
|
||||||
0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
|
0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
|
||||||
0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
|
0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
|
||||||
0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
|
0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
|
||||||
0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
|
0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
|
||||||
0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
|
0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
|
||||||
0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
|
0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
|
||||||
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
|
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
|
||||||
0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
|
0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
|
||||||
0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
|
0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
|
||||||
0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
|
0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D}};
|
||||||
}};
|
|
||||||
|
|
||||||
static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) {
|
static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) {
|
||||||
const auto* data = reinterpret_cast<const unsigned char*>(&value);
|
const auto* data = reinterpret_cast<const unsigned char*>(&value);
|
||||||
|
@ -166,4 +165,4 @@ u32 ComputeCRC32ISO(u32 crc, u64 value, int length) {
|
||||||
return ComputeCRC32(iso_table, crc, value, length);
|
return ComputeCRC32(iso_table, crc, value, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common::Crypto::CRC32
|
} // namespace Dynarmic::Common::Crypto::CRC32
|
||||||
|
|
|
@ -37,4 +37,4 @@ u32 ComputeCRC32Castagnoli(u32 crc, u64 value, int length);
|
||||||
*/
|
*/
|
||||||
u32 ComputeCRC32ISO(u32 crc, u64 value, int length);
|
u32 ComputeCRC32ISO(u32 crc, u64 value, int length);
|
||||||
|
|
||||||
} // namespace Dynarmic::Common::Crypto::CRC32
|
} // namespace Dynarmic::Common::Crypto::CRC32
|
||||||
|
|
|
@ -3,52 +3,52 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/crypto/sm4.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/crypto/sm4.h"
|
|
||||||
|
|
||||||
namespace Dynarmic::Common::Crypto::SM4 {
|
namespace Dynarmic::Common::Crypto::SM4 {
|
||||||
|
|
||||||
using SubstitutionTable = std::array<u8, 256>;
|
using SubstitutionTable = std::array<u8, 256>;
|
||||||
|
|
||||||
constexpr SubstitutionTable substitution_box{{
|
constexpr SubstitutionTable substitution_box{
|
||||||
0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7,
|
{0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7,
|
||||||
0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
|
0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
|
||||||
0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
|
0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
|
||||||
0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
|
0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
|
||||||
0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A,
|
0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A,
|
||||||
0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
|
0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
|
||||||
0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
|
0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
|
||||||
0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
|
0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
|
||||||
0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
|
0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
|
||||||
0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
|
0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
|
||||||
0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B,
|
0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B,
|
||||||
0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
|
0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
|
||||||
0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2,
|
0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2,
|
||||||
0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
|
0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
|
||||||
0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
|
0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
|
||||||
0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
|
0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
|
||||||
0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5,
|
0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5,
|
||||||
0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
|
0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
|
||||||
0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55,
|
0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55,
|
||||||
0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
|
0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
|
||||||
0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
|
0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
|
||||||
0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
|
0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
|
||||||
0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F,
|
0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F,
|
||||||
0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
|
0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
|
||||||
0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F,
|
0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F,
|
||||||
0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
|
0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
|
||||||
0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
|
0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
|
||||||
0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
|
0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
|
||||||
0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E,
|
0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E,
|
||||||
0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
|
0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
|
||||||
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20,
|
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20,
|
||||||
0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48
|
0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48}};
|
||||||
}};
|
|
||||||
|
|
||||||
u8 AccessSubstitutionBox(u8 index) {
|
u8 AccessSubstitutionBox(u8 index) {
|
||||||
return substitution_box[index];
|
return substitution_box[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common::Crypto::SM4
|
} // namespace Dynarmic::Common::Crypto::SM4
|
||||||
|
|
|
@ -11,4 +11,4 @@ namespace Dynarmic::Common::Crypto::SM4 {
|
||||||
|
|
||||||
u8 AccessSubstitutionBox(u8 index);
|
u8 AccessSubstitutionBox(u8 index);
|
||||||
|
|
||||||
} // namespace Dynarmic::Common::Crypto::SM4
|
} // namespace Dynarmic::Common::Crypto::SM4
|
||||||
|
|
|
@ -22,7 +22,8 @@ public:
|
||||||
FPCR() = default;
|
FPCR() = default;
|
||||||
FPCR(const FPCR&) = default;
|
FPCR(const FPCR&) = default;
|
||||||
FPCR(FPCR&&) = default;
|
FPCR(FPCR&&) = default;
|
||||||
explicit FPCR(u32 data) : value{data & mask} {}
|
explicit FPCR(u32 data)
|
||||||
|
: value{data & mask} {}
|
||||||
|
|
||||||
FPCR& operator=(const FPCR&) = default;
|
FPCR& operator=(const FPCR&) = default;
|
||||||
FPCR& operator=(FPCR&&) = default;
|
FPCR& operator=(FPCR&&) = default;
|
||||||
|
@ -204,4 +205,4 @@ inline bool operator!=(FPCR lhs, FPCR rhs) {
|
||||||
return !operator==(lhs, rhs);
|
return !operator==(lhs, rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -18,7 +18,8 @@ public:
|
||||||
FPSR() = default;
|
FPSR() = default;
|
||||||
FPSR(const FPSR&) = default;
|
FPSR(const FPSR&) = default;
|
||||||
FPSR(FPSR&&) = default;
|
FPSR(FPSR&&) = default;
|
||||||
explicit FPSR(u32 data) : value{data & mask} {}
|
explicit FPSR(u32 data)
|
||||||
|
: value{data & mask} {}
|
||||||
|
|
||||||
FPSR& operator=(const FPSR&) = default;
|
FPSR& operator=(const FPSR&) = default;
|
||||||
FPSR& operator=(FPSR&&) = default;
|
FPSR& operator=(FPSR&&) = default;
|
||||||
|
@ -156,4 +157,4 @@ inline bool operator!=(FPSR lhs, FPSR rhs) {
|
||||||
return !operator==(lhs, rhs);
|
return !operator==(lhs, rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dynarmic/common/fp/fused.h"
|
#include "dynarmic/common/fp/fused.h"
|
||||||
|
|
||||||
#include "dynarmic/common/fp/mantissa_util.h"
|
#include "dynarmic/common/fp/mantissa_util.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
#include "dynarmic/common/u128.h"
|
#include "dynarmic/common/u128.h"
|
||||||
|
@ -20,7 +21,7 @@ static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa)
|
||||||
|
|
||||||
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
|
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
|
||||||
const bool product_sign = op1.sign != op2.sign;
|
const bool product_sign = op1.sign != op2.sign;
|
||||||
const auto [product_exponent, product_value] = [op1, op2]{
|
const auto [product_exponent, product_value] = [op1, op2] {
|
||||||
int exponent = op1.exponent + op2.exponent;
|
int exponent = op1.exponent + op2.exponent;
|
||||||
u128 value = Multiply64To128(op1.mantissa, op2.mantissa);
|
u128 value = Multiply64To128(op1.mantissa, op2.mantissa);
|
||||||
if (value.Bit<product_point_position + 1>()) {
|
if (value.Bit<product_point_position + 1>()) {
|
||||||
|
@ -86,4 +87,4 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
|
||||||
return ReduceMantissa(result_sign, result_exponent, result);
|
return ReduceMantissa(result_sign, result_exponent, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -12,4 +12,4 @@ struct FPUnpacked;
|
||||||
/// This function assumes all arguments have been normalized.
|
/// This function assumes all arguments have been normalized.
|
||||||
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);
|
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -135,4 +135,4 @@ constexpr FPT FPValue() {
|
||||||
return FPT(FPInfo<FPT>::Zero(sign) | mantissa | (biased_exponent << FPInfo<FPT>::explicit_mantissa_width));
|
return FPT(FPInfo<FPT>::Zero(sign) | mantissa | (biased_exponent << FPInfo<FPT>::explicit_mantissa_width));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -43,4 +43,4 @@ inline ResidualError ResidualErrorOnRightShift(u64 mantissa, int shift_amount) {
|
||||||
return ResidualError::GreaterThanHalf;
|
return ResidualError::GreaterThanHalf;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -8,10 +8,10 @@
|
||||||
#include "dynarmic/common/fp/op/FPCompare.h"
|
#include "dynarmic/common/fp/op/FPCompare.h"
|
||||||
#include "dynarmic/common/fp/op/FPConvert.h"
|
#include "dynarmic/common/fp/op/FPConvert.h"
|
||||||
#include "dynarmic/common/fp/op/FPMulAdd.h"
|
#include "dynarmic/common/fp/op/FPMulAdd.h"
|
||||||
|
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
|
||||||
|
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
|
||||||
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
|
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
|
||||||
#include "dynarmic/common/fp/op/FPRecipExponent.h"
|
#include "dynarmic/common/fp/op/FPRecipExponent.h"
|
||||||
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
|
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
|
||||||
#include "dynarmic/common/fp/op/FPRoundInt.h"
|
#include "dynarmic/common/fp/op/FPRoundInt.h"
|
||||||
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
|
|
||||||
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
|
|
||||||
#include "dynarmic/common/fp/op/FPToFixed.h"
|
#include "dynarmic/common/fp/op/FPToFixed.h"
|
||||||
|
|
|
@ -3,15 +3,16 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPCompare.h"
|
||||||
|
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/op/FPCompare.h"
|
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
|
||||||
namespace Dynarmic::FP {
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
template <typename FPT>
|
template<typename FPT>
|
||||||
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
|
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
|
||||||
const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr);
|
const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr);
|
||||||
const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr);
|
const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr);
|
||||||
|
@ -20,8 +21,7 @@ bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
|
||||||
const auto& value1 = std::get<FPUnpacked>(unpacked1);
|
const auto& value1 = std::get<FPUnpacked>(unpacked1);
|
||||||
const auto& value2 = std::get<FPUnpacked>(unpacked2);
|
const auto& value2 = std::get<FPUnpacked>(unpacked2);
|
||||||
|
|
||||||
if (type1 == FPType::QNaN || type1 == FPType::SNaN ||
|
if (type1 == FPType::QNaN || type1 == FPType::SNaN || type2 == FPType::QNaN || type2 == FPType::SNaN) {
|
||||||
type2 == FPType::QNaN || type2 == FPType::SNaN) {
|
|
||||||
if (type1 == FPType::SNaN || type2 == FPType::SNaN) {
|
if (type1 == FPType::SNaN || type2 == FPType::SNaN) {
|
||||||
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||||
}
|
}
|
||||||
|
@ -37,4 +37,4 @@ template bool FPCompareEQ<u16>(u16 lhs, u16 rhs, FPCR fpcr, FPSR& fpsr);
|
||||||
template bool FPCompareEQ<u32>(u32 lhs, u32 rhs, FPCR fpcr, FPSR& fpsr);
|
template bool FPCompareEQ<u32>(u32 lhs, u32 rhs, FPCR fpcr, FPSR& fpsr);
|
||||||
template bool FPCompareEQ<u64>(u64 lhs, u64 rhs, FPCR fpcr, FPSR& fpsr);
|
template bool FPCompareEQ<u64>(u64 lhs, u64 rhs, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace Dynarmic::FP {
|
||||||
class FPCR;
|
class FPCR;
|
||||||
class FPSR;
|
class FPSR;
|
||||||
|
|
||||||
template <typename FPT>
|
template<typename FPT>
|
||||||
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr);
|
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,17 +3,18 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPConvert.h"
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/op/FPConvert.h"
|
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
|
||||||
namespace Dynarmic::FP {
|
namespace Dynarmic::FP {
|
||||||
namespace {
|
namespace {
|
||||||
template <typename FPT_TO, typename FPT_FROM>
|
template<typename FPT_TO, typename FPT_FROM>
|
||||||
FPT_TO FPConvertNaN(FPT_FROM op) {
|
FPT_TO FPConvertNaN(FPT_FROM op) {
|
||||||
const bool sign = Common::Bit<Common::BitSize<FPT_FROM>() - 1>(op);
|
const bool sign = Common::Bit<Common::BitSize<FPT_FROM>() - 1>(op);
|
||||||
const u64 frac = [op] {
|
const u64 frac = [op] {
|
||||||
|
@ -38,9 +39,9 @@ FPT_TO FPConvertNaN(FPT_FROM op) {
|
||||||
return FPT_TO(shifted_sign | exponent << 9 | Common::Bits<42, 50>(frac));
|
return FPT_TO(shifted_sign | exponent << 9 | Common::Bits<42, 50>(frac));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
template <typename FPT_TO, typename FPT_FROM>
|
template<typename FPT_TO, typename FPT_FROM>
|
||||||
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr) {
|
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr) {
|
||||||
const auto [type, sign, value] = FPUnpackCV<FPT_FROM>(op, fpcr, fpsr);
|
const auto [type, sign, value] = FPUnpackCV<FPT_FROM>(op, fpcr, fpsr);
|
||||||
const bool is_althp = Common::BitSize<FPT_TO>() == 16 && fpcr.AHP();
|
const bool is_althp = Common::BitSize<FPT_TO>() == 16 && fpcr.AHP();
|
||||||
|
@ -86,4 +87,4 @@ template u32 FPConvert<u32, u64>(u64 op, FPCR fpcr, RoundingMode rounding_mode,
|
||||||
template u64 FPConvert<u64, u16>(u16 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
|
template u64 FPConvert<u64, u16>(u16 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
|
||||||
template u64 FPConvert<u64, u32>(u32 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
|
template u64 FPConvert<u64, u32>(u32 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -11,7 +11,7 @@ class FPCR;
|
||||||
class FPSR;
|
class FPSR;
|
||||||
enum class RoundingMode;
|
enum class RoundingMode;
|
||||||
|
|
||||||
template <typename FPT_TO, typename FPT_FROM>
|
template<typename FPT_TO, typename FPT_FROM>
|
||||||
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
|
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,12 +3,13 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPMulAdd.h"
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
|
||||||
#include "dynarmic/common/fp/fused.h"
|
#include "dynarmic/common/fp/fused.h"
|
||||||
#include "dynarmic/common/fp/op/FPMulAdd.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
@ -76,4 +77,4 @@ template u16 FPMulAdd<u16>(u16 addend, u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
template u32 FPMulAdd<u32>(u32 addend, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
template u32 FPMulAdd<u32>(u32 addend, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
template u64 FPMulAdd<u64>(u64 addend, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
template u64 FPMulAdd<u64>(u64 addend, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -13,4 +13,4 @@ class FPSR;
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -14,4 +14,4 @@ constexpr FPT FPNeg(FPT op) {
|
||||||
return op ^ FPInfo<FPT>::sign_mask;
|
return op ^ FPInfo<FPT>::sign_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,11 +3,12 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
|
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
@ -54,4 +55,4 @@ template u16 FPRSqrtEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -13,4 +13,4 @@ class FPSR;
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
|
FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,12 +3,13 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
|
||||||
|
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/fused.h"
|
#include "dynarmic/common/fp/fused.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/op/FPNeg.h"
|
#include "dynarmic/common/fp/op/FPNeg.h"
|
||||||
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
|
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
|
||||||
|
@ -53,4 +54,4 @@ template u16 FPRSqrtStepFused<u16>(u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -13,4 +13,4 @@ class FPSR;
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
|
||||||
|
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
|
@ -10,7 +12,6 @@
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
|
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
@ -39,7 +40,7 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
|
if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
|
||||||
const bool overflow_to_inf = [&]{
|
const bool overflow_to_inf = [&] {
|
||||||
switch (fpcr.RMode()) {
|
switch (fpcr.RMode()) {
|
||||||
case RoundingMode::ToNearest_TieEven:
|
case RoundingMode::ToNearest_TieEven:
|
||||||
return true;
|
return true;
|
||||||
|
@ -95,4 +96,4 @@ template u16 FPRecipEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u32 FPRecipEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
template u32 FPRecipEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u64 FPRecipEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
template u64 FPRecipEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -13,4 +13,4 @@ class FPSR;
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
|
FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,18 +3,19 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/fp/op/FPRecipExponent.h"
|
||||||
|
|
||||||
#include "dynarmic/common/bit_util.h"
|
#include "dynarmic/common/bit_util.h"
|
||||||
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/op/FPRecipExponent.h"
|
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
|
||||||
namespace Dynarmic::FP {
|
namespace Dynarmic::FP {
|
||||||
namespace {
|
namespace {
|
||||||
template <typename FPT>
|
template<typename FPT>
|
||||||
FPT DetermineExponentValue(size_t value) {
|
FPT DetermineExponentValue(size_t value) {
|
||||||
if constexpr (sizeof(FPT) == sizeof(u32)) {
|
if constexpr (sizeof(FPT) == sizeof(u32)) {
|
||||||
return static_cast<FPT>(Common::Bits<23, 30>(value));
|
return static_cast<FPT>(Common::Bits<23, 30>(value));
|
||||||
|
@ -24,9 +25,9 @@ FPT DetermineExponentValue(size_t value) {
|
||||||
return static_cast<FPT>(Common::Bits<10, 14>(value));
|
return static_cast<FPT>(Common::Bits<10, 14>(value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
template <typename FPT>
|
template<typename FPT>
|
||||||
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
|
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
|
||||||
const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
|
const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
|
||||||
(void)value;
|
(void)value;
|
||||||
|
@ -54,4 +55,4 @@ template u16 FPRecipExponent<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace Dynarmic::FP {
|
||||||
class FPCR;
|
class FPCR;
|
||||||
class FPSR;
|
class FPSR;
|
||||||
|
|
||||||
template <typename FPT>
|
template<typename FPT>
|
||||||
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
|
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,12 +3,13 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
|
||||||
|
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/fused.h"
|
#include "dynarmic/common/fp/fused.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/op/FPNeg.h"
|
#include "dynarmic/common/fp/op/FPNeg.h"
|
||||||
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
|
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
|
||||||
|
@ -52,4 +53,4 @@ template u16 FPRecipStepFused<u16>(u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
template u32 FPRecipStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
template u32 FPRecipStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
template u64 FPRecipStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
template u64 FPRecipStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -13,4 +13,4 @@ class FPSR;
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPRoundInt.h"
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
#include "dynarmic/common/bit_util.h"
|
#include "dynarmic/common/bit_util.h"
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
|
@ -10,7 +12,6 @@
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/mantissa_util.h"
|
#include "dynarmic/common/fp/mantissa_util.h"
|
||||||
#include "dynarmic/common/fp/op/FPRoundInt.h"
|
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
#include "dynarmic/common/fp/rounding_mode.h"
|
#include "dynarmic/common/fp/rounding_mode.h"
|
||||||
|
@ -78,8 +79,8 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
|
||||||
const u64 abs_int_result = new_sign ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
|
const u64 abs_int_result = new_sign ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
|
||||||
|
|
||||||
const FPT result = int_result == 0
|
const FPT result = int_result == 0
|
||||||
? FPInfo<FPT>::Zero(sign)
|
? FPInfo<FPT>::Zero(sign)
|
||||||
: FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
|
: FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
|
||||||
|
|
||||||
if (error != ResidualError::Zero && exact) {
|
if (error != ResidualError::Zero && exact) {
|
||||||
FPProcessException(FPExc::Inexact, fpcr, fpsr);
|
FPProcessException(FPExc::Inexact, fpcr, fpsr);
|
||||||
|
@ -92,4 +93,4 @@ template u64 FPRoundInt<u16>(u16 op, FPCR fpcr, RoundingMode rounding, bool exac
|
||||||
template u64 FPRoundInt<u32>(u32 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
|
template u64 FPRoundInt<u32>(u32 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
|
||||||
template u64 FPRoundInt<u64>(u64 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
|
template u64 FPRoundInt<u64>(u64 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -16,4 +16,4 @@ enum class RoundingMode;
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
|
u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,13 +3,14 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/op/FPToFixed.h"
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
#include "dynarmic/common/bit_util.h"
|
#include "dynarmic/common/bit_util.h"
|
||||||
#include "dynarmic/common/common_types.h"
|
#include "dynarmic/common/common_types.h"
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/mantissa_util.h"
|
#include "dynarmic/common/fp/mantissa_util.h"
|
||||||
#include "dynarmic/common/fp/op/FPToFixed.h"
|
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/rounding_mode.h"
|
#include "dynarmic/common/fp/rounding_mode.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
@ -98,4 +99,4 @@ template u64 FPToFixed<u16>(size_t ibits, u16 op, size_t fbits, bool unsigned_,
|
||||||
template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
||||||
template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -16,4 +16,4 @@ enum class RoundingMode;
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,10 +3,11 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
|
||||||
|
|
||||||
namespace Dynarmic::FP {
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
|
@ -54,4 +55,4 @@ void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -21,4 +21,4 @@ enum class FPExc {
|
||||||
|
|
||||||
void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
|
void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/process_nan.h"
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
|
@ -11,7 +13,6 @@
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/process_nan.h"
|
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
|
||||||
namespace Dynarmic::FP {
|
namespace Dynarmic::FP {
|
||||||
|
@ -88,4 +89,4 @@ template std::optional<u16> FPProcessNaNs3<u16>(FPType type1, FPType type2, FPTy
|
||||||
template std::optional<u32> FPProcessNaNs3<u32>(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr);
|
template std::optional<u32> FPProcessNaNs3<u32>(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr);
|
||||||
template std::optional<u64> FPProcessNaNs3<u64>(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr);
|
template std::optional<u64> FPProcessNaNs3<u64>(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -22,4 +22,4 @@ std::optional<FPT> FPProcessNaNs(FPType type1, FPType type2, FPT op1, FPT op2, F
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
std::optional<FPT> FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT op1, FPT op2, FPT op3, FPCR fpcr, FPSR& fpsr);
|
std::optional<FPT> FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT op1, FPT op2, FPT op3, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -24,4 +24,4 @@ enum class RoundingMode {
|
||||||
ToOdd,
|
ToOdd,
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -3,12 +3,13 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/common/fp/unpacked.h"
|
||||||
|
|
||||||
#include "dynarmic/common/fp/fpsr.h"
|
#include "dynarmic/common/fp/fpsr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/mantissa_util.h"
|
#include "dynarmic/common/fp/mantissa_util.h"
|
||||||
#include "dynarmic/common/fp/process_exception.h"
|
#include "dynarmic/common/fp/process_exception.h"
|
||||||
#include "dynarmic/common/fp/rounding_mode.h"
|
#include "dynarmic/common/fp/rounding_mode.h"
|
||||||
#include "dynarmic/common/fp/unpacked.h"
|
|
||||||
#include "dynarmic/common/safe_ops.h"
|
#include "dynarmic/common/safe_ops.h"
|
||||||
|
|
||||||
namespace Dynarmic::FP {
|
namespace Dynarmic::FP {
|
||||||
|
@ -143,12 +144,12 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
|
||||||
|
|
||||||
FPT result = 0;
|
FPT result = 0;
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(push)
|
# pragma warning(push)
|
||||||
#pragma warning(disable:4127) // C4127: conditional expression is constant
|
# pragma warning(disable : 4127) // C4127: conditional expression is constant
|
||||||
#endif
|
#endif
|
||||||
if (!isFP16 || !fpcr.AHP()) {
|
if (!isFP16 || !fpcr.AHP()) {
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(pop)
|
# pragma warning(pop)
|
||||||
#endif
|
#endif
|
||||||
constexpr int max_biased_exp = (1 << E) - 1;
|
constexpr int max_biased_exp = (1 << E) - 1;
|
||||||
if (biased_exp >= max_biased_exp) {
|
if (biased_exp >= max_biased_exp) {
|
||||||
|
@ -188,4 +189,4 @@ template u16 FPRoundBase<u16>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, F
|
||||||
template u32 FPRoundBase<u32>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
template u32 FPRoundBase<u32>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
||||||
template u64 FPRoundBase<u64>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
template u64 FPRoundBase<u64>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -85,4 +85,4 @@ FPT FPRound(FPUnpacked op, FPCR fpcr, FPSR& fpsr) {
|
||||||
return FPRound<FPT>(op, fpcr, fpcr.RMode(), fpsr);
|
return FPRound<FPT>(op, fpcr, fpcr.RMode(), fpsr);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -96,4 +96,4 @@ constexpr std::optional<FPT> ProcessNaNs(FPT a, FPT b, FPT c) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -14,10 +14,12 @@
|
||||||
|
|
||||||
namespace Dynarmic::Common {
|
namespace Dynarmic::Common {
|
||||||
|
|
||||||
template <typename T> class IntrusiveList;
|
template<typename T>
|
||||||
template <typename T> class IntrusiveListIterator;
|
class IntrusiveList;
|
||||||
|
template<typename T>
|
||||||
|
class IntrusiveListIterator;
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
class IntrusiveListNode {
|
class IntrusiveListNode {
|
||||||
public:
|
public:
|
||||||
bool IsSentinel() const {
|
bool IsSentinel() const {
|
||||||
|
@ -34,9 +36,8 @@ protected:
|
||||||
friend class IntrusiveListIterator<const T>;
|
friend class IntrusiveListIterator<const T>;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
class IntrusiveListSentinel final : public IntrusiveListNode<T>
|
class IntrusiveListSentinel final : public IntrusiveListNode<T> {
|
||||||
{
|
|
||||||
using IntrusiveListNode<T>::next;
|
using IntrusiveListNode<T>::next;
|
||||||
using IntrusiveListNode<T>::prev;
|
using IntrusiveListNode<T>::prev;
|
||||||
using IntrusiveListNode<T>::is_sentinel;
|
using IntrusiveListNode<T>::is_sentinel;
|
||||||
|
@ -49,33 +50,36 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
class IntrusiveListIterator {
|
class IntrusiveListIterator {
|
||||||
public:
|
public:
|
||||||
using iterator_category = std::bidirectional_iterator_tag;
|
using iterator_category = std::bidirectional_iterator_tag;
|
||||||
using difference_type = std::ptrdiff_t;
|
using difference_type = std::ptrdiff_t;
|
||||||
using value_type = T;
|
using value_type = T;
|
||||||
using pointer = value_type*;
|
using pointer = value_type*;
|
||||||
using const_pointer = const value_type*;
|
using const_pointer = const value_type*;
|
||||||
using reference = value_type&;
|
using reference = value_type&;
|
||||||
using const_reference = const value_type&;
|
using const_reference = const value_type&;
|
||||||
|
|
||||||
// If value_type is const, we want "const IntrusiveListNode<value_type>", not "const IntrusiveListNode<const value_type>"
|
// If value_type is const, we want "const IntrusiveListNode<value_type>", not "const IntrusiveListNode<const value_type>"
|
||||||
using node_type = std::conditional_t<std::is_const<value_type>::value,
|
using node_type = std::conditional_t<std::is_const<value_type>::value,
|
||||||
const IntrusiveListNode<std::remove_const_t<value_type>>,
|
const IntrusiveListNode<std::remove_const_t<value_type>>,
|
||||||
IntrusiveListNode<value_type>>;
|
IntrusiveListNode<value_type>>;
|
||||||
using node_pointer = node_type*;
|
using node_pointer = node_type*;
|
||||||
using node_reference = node_type&;
|
using node_reference = node_type&;
|
||||||
|
|
||||||
IntrusiveListIterator() = default;
|
IntrusiveListIterator() = default;
|
||||||
IntrusiveListIterator(const IntrusiveListIterator& other) = default;
|
IntrusiveListIterator(const IntrusiveListIterator& other) = default;
|
||||||
IntrusiveListIterator& operator=(const IntrusiveListIterator& other) = default;
|
IntrusiveListIterator& operator=(const IntrusiveListIterator& other) = default;
|
||||||
|
|
||||||
explicit IntrusiveListIterator(node_pointer list_node) : node(list_node) {
|
explicit IntrusiveListIterator(node_pointer list_node)
|
||||||
|
: node(list_node) {
|
||||||
}
|
}
|
||||||
explicit IntrusiveListIterator(pointer data) : node(data) {
|
explicit IntrusiveListIterator(pointer data)
|
||||||
|
: node(data) {
|
||||||
}
|
}
|
||||||
explicit IntrusiveListIterator(reference data) : node(&data) {
|
explicit IntrusiveListIterator(reference data)
|
||||||
|
: node(&data) {
|
||||||
}
|
}
|
||||||
|
|
||||||
IntrusiveListIterator& operator++() {
|
IntrusiveListIterator& operator++() {
|
||||||
|
@ -121,19 +125,19 @@ private:
|
||||||
node_pointer node = nullptr;
|
node_pointer node = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template<typename T>
|
||||||
class IntrusiveList {
|
class IntrusiveList {
|
||||||
public:
|
public:
|
||||||
using difference_type = std::ptrdiff_t;
|
using difference_type = std::ptrdiff_t;
|
||||||
using size_type = std::size_t;
|
using size_type = std::size_t;
|
||||||
using value_type = T;
|
using value_type = T;
|
||||||
using pointer = value_type*;
|
using pointer = value_type*;
|
||||||
using const_pointer = const value_type*;
|
using const_pointer = const value_type*;
|
||||||
using reference = value_type&;
|
using reference = value_type&;
|
||||||
using const_reference = const value_type&;
|
using const_reference = const value_type&;
|
||||||
using iterator = IntrusiveListIterator<value_type>;
|
using iterator = IntrusiveListIterator<value_type>;
|
||||||
using const_iterator = IntrusiveListIterator<const value_type>;
|
using const_iterator = IntrusiveListIterator<const value_type>;
|
||||||
using reverse_iterator = std::reverse_iterator<iterator>;
|
using reverse_iterator = std::reverse_iterator<iterator>;
|
||||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -222,10 +226,10 @@ public:
|
||||||
|
|
||||||
node->prev->next = node->next;
|
node->prev->next = node->next;
|
||||||
node->next->prev = node->prev;
|
node->next->prev = node->prev;
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
node->next = nullptr;
|
node->next = nullptr;
|
||||||
node->prev = nullptr;
|
node->prev = nullptr;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
@ -308,21 +312,21 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterator interface
|
// Iterator interface
|
||||||
iterator begin() { return iterator(root->next); }
|
iterator begin() { return iterator(root->next); }
|
||||||
const_iterator begin() const { return const_iterator(root->next); }
|
const_iterator begin() const { return const_iterator(root->next); }
|
||||||
const_iterator cbegin() const { return begin(); }
|
const_iterator cbegin() const { return begin(); }
|
||||||
|
|
||||||
iterator end() { return iterator(root.get()); }
|
iterator end() { return iterator(root.get()); }
|
||||||
const_iterator end() const { return const_iterator(root.get()); }
|
const_iterator end() const { return const_iterator(root.get()); }
|
||||||
const_iterator cend() const { return end(); }
|
const_iterator cend() const { return end(); }
|
||||||
|
|
||||||
reverse_iterator rbegin() { return reverse_iterator(end()); }
|
reverse_iterator rbegin() { return reverse_iterator(end()); }
|
||||||
const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
|
const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
|
||||||
const_reverse_iterator crbegin() const { return rbegin(); }
|
const_reverse_iterator crbegin() const { return rbegin(); }
|
||||||
|
|
||||||
reverse_iterator rend() { return reverse_iterator(begin()); }
|
reverse_iterator rend() { return reverse_iterator(begin()); }
|
||||||
const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
|
const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
|
||||||
const_reverse_iterator crend() const { return rend(); }
|
const_reverse_iterator crend() const { return rend(); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Erases a node from the list, indicated by an iterator.
|
* Erases a node from the list, indicated by an iterator.
|
||||||
|
@ -367,9 +371,9 @@ private:
|
||||||
* @param lhs The first list.
|
* @param lhs The first list.
|
||||||
* @param rhs The second list.
|
* @param rhs The second list.
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template<typename T>
|
||||||
void swap(IntrusiveList<T>& lhs, IntrusiveList<T>& rhs) noexcept {
|
void swap(IntrusiveList<T>& lhs, IntrusiveList<T>& rhs) noexcept {
|
||||||
lhs.swap(rhs);
|
lhs.swap(rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
|
|
@ -25,11 +25,11 @@ struct ReverseAdapter {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
constexpr detail::ReverseAdapter<T> Reverse(T&& iterable) {
|
constexpr detail::ReverseAdapter<T> Reverse(T&& iterable) {
|
||||||
return detail::ReverseAdapter<T>{iterable};
|
return detail::ReverseAdapter<T>{iterable};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
|
|
@ -8,8 +8,8 @@
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#ifdef DYNARMIC_USE_LLVM
|
#ifdef DYNARMIC_USE_LLVM
|
||||||
#include <llvm-c/Disassembler.h>
|
# include <llvm-c/Disassembler.h>
|
||||||
#include <llvm-c/Target.h>
|
# include <llvm-c/Target.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "dynarmic/common/assert.h"
|
#include "dynarmic/common/assert.h"
|
||||||
|
@ -79,8 +79,10 @@ std::string DisassembleAArch32([[maybe_unused]] bool is_thumb, [[maybe_unused]]
|
||||||
result += inst_size > 0 ? buffer : "<invalid instruction>";
|
result += inst_size > 0 ? buffer : "<invalid instruction>";
|
||||||
result += '\n';
|
result += '\n';
|
||||||
|
|
||||||
if (inst_size == 0) inst_size = is_thumb ? 2 : 4;
|
if (inst_size == 0)
|
||||||
if (length <= inst_size) break;
|
inst_size = is_thumb ? 2 : 4;
|
||||||
|
if (length <= inst_size)
|
||||||
|
break;
|
||||||
|
|
||||||
pc += inst_size;
|
pc += inst_size;
|
||||||
instructions += inst_size;
|
instructions += inst_size;
|
||||||
|
@ -118,4 +120,4 @@ std::string DisassembleAArch64([[maybe_unused]] u32 instruction, [[maybe_unused]
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
} // namespace Dynarmic::Common
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue