A64: Add unsafe_optimizations option
* Strength reduce FMA unsafely
This commit is contained in:
parent
82868034d3
commit
761e95eec0
5 changed files with 35 additions and 0 deletions
|
@ -138,6 +138,9 @@ struct UserConfig {
|
|||
return (f & optimizations) != no_optimizations;
|
||||
}
|
||||
|
||||
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
|
||||
bool unsafe_optimizations = false;
|
||||
|
||||
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any
|
||||
/// data cache instruction is executed. Notably DC ZVA will not implicitly do anything.
|
||||
/// When set to false, UserCallbacks::DataCacheOperationRaised will never be called.
|
||||
|
|
|
@ -29,6 +29,8 @@ struct A64EmitContext final : public EmitContext {
|
|||
bool IsSingleStep() const;
|
||||
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
||||
|
||||
bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; }
|
||||
|
||||
const A64::UserConfig& conf;
|
||||
};
|
||||
|
||||
|
|
|
@ -51,6 +51,8 @@ struct EmitContext {
|
|||
|
||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||
|
||||
virtual bool UnsafeOptimizations() const { return false; }
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Block& block;
|
||||
};
|
||||
|
|
|
@ -637,6 +637,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||
|
||||
FCODE(muls)(operand2, operand3);
|
||||
FCODE(adds)(operand1, operand2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, operand1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
|
|
@ -1021,6 +1021,20 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||
|
||||
FCODE(mulp)(operand2, operand3);
|
||||
FCODE(addp)(operand1, operand2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, operand1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
EmitFourOpFallback(code, ctx, inst, fallback_fn);
|
||||
|
|
Loading…
Reference in a new issue