Add intrinsics support (#121)

* Initial intrinsics support

* Update tests to work with the new Vector128 type and intrinsics

* Drop SSE4.1 requirement

* Fix copy-paste mistake
This commit is contained in:
gdkchan 2018-05-11 20:10:27 -03:00 committed by GitHub
parent 428360c5ac
commit 1aa96453ef
14 changed files with 1233 additions and 713 deletions

View file

@ -1,6 +1,12 @@
using System.Runtime.Intrinsics.X86;
public static class AOptimizations
{
public static bool DisableMemoryChecks = false;
public static bool GenerateCallStack = true;
public static bool UseSse2IfAvailable = true;
internal static bool UseSse2 = UseSse2IfAvailable && Sse2.IsSupported;
}

View file

@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp2.0</TargetFramework>
<TargetFramework>netcoreapp2.1</TargetFramework>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
@ -12,4 +12,8 @@
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
</ItemGroup>
</Project>

View file

@ -4,6 +4,7 @@ using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -41,7 +42,14 @@ namespace ChocolArm64.Instruction
public static void Add_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Add));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Addhn_V(AILEmitterCtx Context)
@ -158,7 +166,7 @@ namespace ChocolArm64.Instruction
Context.Emit(OpCodes.Conv_U1);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.CountSetBits8));
Context.Emit(OpCodes.Conv_U8);
@ -303,12 +311,26 @@ namespace ChocolArm64.Instruction
public static void Fadd_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.AddScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Fadd_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Add));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Faddp_V(AILEmitterCtx Context)
@ -345,12 +367,26 @@ namespace ChocolArm64.Instruction
public static void Fdiv_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.DivideScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
}
}
public static void Fdiv_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Divide));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
}
}
public static void Fmadd_S(AILEmitterCtx Context)
@ -370,11 +406,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
}
else
{
@ -391,11 +427,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
}
else
{
@ -412,11 +448,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
}
else
{
@ -435,11 +471,11 @@ namespace ChocolArm64.Instruction
{
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
}
else if (SizeF == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
}
else
{
@ -505,7 +541,14 @@ namespace ChocolArm64.Instruction
public static void Fmul_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.MultiplyScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
}
}
public static void Fmul_Se(AILEmitterCtx Context)
@ -515,7 +558,14 @@ namespace ChocolArm64.Instruction
public static void Fmul_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Multiply));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
}
}
public static void Fmul_Ve(AILEmitterCtx Context)
@ -716,11 +766,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -743,11 +793,11 @@ namespace ChocolArm64.Instruction
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (SizeF == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -819,11 +869,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -844,11 +894,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -947,12 +997,26 @@ namespace ChocolArm64.Instruction
public static void Fsub_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.SubtractScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Fsub_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Subtract));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Mla_V(AILEmitterCtx Context)
@ -1066,7 +1130,14 @@ namespace ChocolArm64.Instruction
public static void Sub_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Subtract));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Subhn_V(AILEmitterCtx Context)

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitAluHelper;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -13,17 +14,38 @@ namespace ChocolArm64.Instruction
{
public static void Cmeq_V(AILEmitterCtx Context)
{
EmitVectorCmp(Context, OpCodes.Beq_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareEqual));
}
else
{
EmitVectorCmp(Context, OpCodes.Beq_S);
}
}
public static void Cmge_V(AILEmitterCtx Context)
{
EmitVectorCmp(Context, OpCodes.Bge_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareGreaterThanOrEqual));
}
else
{
EmitVectorCmp(Context, OpCodes.Bge_S);
}
}
public static void Cmgt_V(AILEmitterCtx Context)
{
EmitVectorCmp(Context, OpCodes.Bgt_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan));
}
else
{
EmitVectorCmp(Context, OpCodes.Bgt_S);
}
}
public static void Cmhi_V(AILEmitterCtx Context)
@ -112,32 +134,74 @@ namespace ChocolArm64.Instruction
public static void Fcmeq_S(AILEmitterCtx Context)
{
EmitScalarFcmp(Context, OpCodes.Beq_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareEqualScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Beq_S);
}
}
public static void Fcmeq_V(AILEmitterCtx Context)
{
EmitVectorFcmp(Context, OpCodes.Beq_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareEqual));
}
else
{
EmitVectorFcmp(Context, OpCodes.Beq_S);
}
}
public static void Fcmge_S(AILEmitterCtx Context)
{
EmitScalarFcmp(Context, OpCodes.Bge_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqualScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Bge_S);
}
}
public static void Fcmge_V(AILEmitterCtx Context)
{
EmitVectorFcmp(Context, OpCodes.Bge_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqual));
}
else
{
EmitVectorFcmp(Context, OpCodes.Bge_S);
}
}
public static void Fcmgt_S(AILEmitterCtx Context)
{
EmitScalarFcmp(Context, OpCodes.Bgt_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Bgt_S);
}
}
public static void Fcmgt_V(AILEmitterCtx Context)
{
EmitVectorFcmp(Context, OpCodes.Bgt_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThan));
}
else
{
EmitVectorFcmp(Context, OpCodes.Bgt_S);
}
}
public static void Fcmle_S(AILEmitterCtx Context)

View file

@ -382,15 +382,15 @@ namespace ChocolArm64.Instruction
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.SatF32ToS32)
: nameof(ASoftFallback.SatF32ToU32));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.SatF32ToS32)
: nameof(AVectorHelper.SatF32ToU32));
}
else /* if (SizeF == 1) */
{
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.SatF64ToS64)
: nameof(ASoftFallback.SatF64ToU64));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.SatF64ToS64)
: nameof(AVectorHelper.SatF64ToU64));
}
if (SizeF == 0)
@ -420,22 +420,22 @@ namespace ChocolArm64.Instruction
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS32));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS32));
}
}
else
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS64));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS64));
}
}
}
@ -453,22 +453,22 @@ namespace ChocolArm64.Instruction
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU32));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU32));
}
}
else
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU64));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU64));
}
}
}

View file

@ -3,6 +3,8 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instruction
{
@ -32,6 +34,129 @@ namespace ChocolArm64.Instruction
return (8 << (Op.Size + 1)) - Op.Imm;
}
public static void EmitSse2Call(AILEmitterCtx Context, string Name)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
void Ldvec(int Reg)
{
Context.EmitLdvec(Reg);
switch (Op.Size)
{
case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break;
case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break;
case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break;
case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break;
}
}
Ldvec(Op.Rn);
Type BaseType = null;
Type[] Types;
switch (Op.Size)
{
case 0: BaseType = typeof(Vector128<sbyte>); break;
case 1: BaseType = typeof(Vector128<short>); break;
case 2: BaseType = typeof(Vector128<int>); break;
case 3: BaseType = typeof(Vector128<long>); break;
}
if (Op is AOpCodeSimdReg BinOp)
{
Ldvec(BinOp.Rm);
Types = new Type[] { BaseType, BaseType };
}
else
{
Types = new Type[] { BaseType };
}
Context.EmitCall(typeof(Sse2).GetMethod(Name, Types));
switch (Op.Size)
{
case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break;
case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16ToSingle)); break;
case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32ToSingle)); break;
case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt64ToSingle)); break;
}
Context.EmitStvec(Op.Rd);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
public static void EmitSse2CallF(AILEmitterCtx Context, string Name)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
void Ldvec(int Reg)
{
Context.EmitLdvec(Reg);
if (SizeF == 1)
{
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble));
}
}
Ldvec(Op.Rn);
Type BaseType = SizeF == 0
? typeof(Vector128<float>)
: typeof(Vector128<double>);
Type[] Types;
if (Op is AOpCodeSimdReg BinOp)
{
Ldvec(BinOp.Rm);
Types = new Type[] { BaseType, BaseType };
}
else
{
Types = new Type[] { BaseType };
}
MethodInfo MthdInfo;
if (SizeF == 0)
{
MthdInfo = typeof(Sse).GetMethod(Name, Types);
}
else /* if (SizeF == 1) */
{
MthdInfo = typeof(Sse2).GetMethod(Name, Types);
}
Context.EmitCall(MthdInfo);
if (SizeF == 1)
{
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));
}
Context.EmitStvec(Op.Rd);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
public static void EmitUnaryMathCall(AILEmitterCtx Context, string Name)
{
IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
@ -596,9 +721,9 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.VectorExtractIntSx)
: nameof(ASoftFallback.VectorExtractIntZx));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.VectorExtractIntSx)
: nameof(AVectorHelper.VectorExtractIntZx));
}
public static void EmitVectorExtractF(AILEmitterCtx Context, int Reg, int Index, int Size)
@ -610,11 +735,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractDouble));
}
else
{
@ -646,7 +771,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvec(Reg);
}
@ -659,7 +784,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvectmp();
}
@ -673,7 +798,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvec(Reg);
}
@ -687,11 +812,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
}
else
{
@ -710,11 +835,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
}
else
{

View file

@ -2,6 +2,7 @@ using ChocolArm64.Decoder;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -11,7 +12,14 @@ namespace ChocolArm64.Instruction
{
public static void And_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.And));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And));
}
}
public static void Bic_V(AILEmitterCtx Context)
@ -95,7 +103,14 @@ namespace ChocolArm64.Instruction
public static void Eor_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Xor));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor));
}
}
public static void Not_V(AILEmitterCtx Context)
@ -114,7 +129,14 @@ namespace ChocolArm64.Instruction
public static void Orr_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Or));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or));
}
}
public static void Orr_Vi(AILEmitterCtx Context)

View file

@ -234,21 +234,21 @@ namespace ChocolArm64.Instruction
switch (Op.Size)
{
case 1: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl1_V64),
nameof(ASoftFallback.Tbl1_V128)); break;
case 1: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl1_V64),
nameof(AVectorHelper.Tbl1_V128)); break;
case 2: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl2_V64),
nameof(ASoftFallback.Tbl2_V128)); break;
case 2: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl2_V64),
nameof(AVectorHelper.Tbl2_V128)); break;
case 3: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl3_V64),
nameof(ASoftFallback.Tbl3_V128)); break;
case 3: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl3_V64),
nameof(AVectorHelper.Tbl3_V128)); break;
case 4: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl4_V64),
nameof(ASoftFallback.Tbl4_V128)); break;
case 4: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl4_V64),
nameof(AVectorHelper.Tbl4_V128)); break;
default: throw new InvalidOperationException();
}

View file

@ -1,20 +1,11 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace ChocolArm64.Instruction
{
static class ASoftFallback
{
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
Context.EmitCall(typeof(ASoftFallback), IsSimd64 ? Name64 : Name128);
}
public static void EmitCall(AILEmitterCtx Context, string MthdName)
{
Context.EmitCall(typeof(ASoftFallback), MthdName);
@ -160,78 +151,6 @@ namespace ChocolArm64.Instruction
throw new ArgumentException(nameof(Size));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
public static long SMulHi128(long LHS, long RHS)
{
return (long)(BigInteger.Multiply(LHS, RHS) >> 64);
@ -241,239 +160,5 @@ namespace ChocolArm64.Instruction
{
return (ulong)(BigInteger.Multiply(LHS, RHS) >> 64);
}
public static int CountSetBits8(byte Value)
{
return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
((Value >> 2) & 1) + ((Value >> 3) & 1) +
((Value >> 4) & 1) + ((Value >> 5) & 1) +
((Value >> 6) & 1) + (Value >> 7);
}
public static float MaxF(float val1, float val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.SingleToInt32Bits(val1) < 0 && BitConverter.SingleToInt32Bits(val2) < 0)
return -0.0f;
return 0.0f;
}
if (val1 > val2)
return val1;
if (float.IsNaN(val1))
return val1;
return val2;
}
public static double Max(double val1, double val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.DoubleToInt64Bits(val1) < 0 && BitConverter.DoubleToInt64Bits(val2) < 0)
return -0.0;
return 0.0;
}
if (val1 > val2)
return val1;
if (double.IsNaN(val1))
return val1;
return val2;
}
public static float MinF(float val1, float val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.SingleToInt32Bits(val1) < 0 || BitConverter.SingleToInt32Bits(val2) < 0)
return -0.0f;
return 0.0f;
}
if (val1 < val2)
return val1;
if (float.IsNaN(val1))
return val1;
return val2;
}
public static double Min(double val1, double val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.DoubleToInt64Bits(val1) < 0 || BitConverter.DoubleToInt64Bits(val2) < 0)
return -0.0;
return 0.0;
}
if (val1 < val2)
return val1;
if (double.IsNaN(val1))
return val1;
return val2;
}
public static float RoundF(float Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return MathF.Round (Value);
case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value);
case ARoundMode.TowardsZero: return MathF.Truncate(Value);
}
throw new InvalidOperationException();
}
public static double Round(double Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return Math.Round (Value);
case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value);
case ARoundMode.TowardsZero: return Math.Truncate(Value);
}
throw new InvalidOperationException();
}
public static AVec Tbl1_V64(AVec Vector, AVec Tb0)
{
return Tbl(Vector, 8, Tb0);
}
public static AVec Tbl1_V128(AVec Vector, AVec Tb0)
{
return Tbl(Vector, 16, Tb0);
}
public static AVec Tbl2_V64(AVec Vector, AVec Tb0, AVec Tb1)
{
return Tbl(Vector, 8, Tb0, Tb1);
}
public static AVec Tbl2_V128(AVec Vector, AVec Tb0, AVec Tb1)
{
return Tbl(Vector, 16, Tb0, Tb1);
}
public static AVec Tbl3_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2);
}
public static AVec Tbl3_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2);
}
public static AVec Tbl4_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
}
public static AVec Tbl4_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
}
private static AVec Tbl(AVec Vector, int Bytes, params AVec[] Tb)
{
AVec Res = new AVec();
byte[] Table = new byte[Tb.Length * 16];
for (int Index = 0; Index < Tb.Length; Index++)
for (int Index2 = 0; Index2 < 16; Index2++)
{
Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
}
for (int Index = 0; Index < Bytes; Index++)
{
byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
if (TblIdx < Table.Length)
{
Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
}
}
return Res;
}
public static ulong VectorExtractIntZx(AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return Vector.ExtractByte (Index);
case 1: return Vector.ExtractUInt16(Index);
case 2: return Vector.ExtractUInt32(Index);
case 3: return Vector.ExtractUInt64(Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
public static long VectorExtractIntSx(AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return (sbyte)Vector.ExtractByte (Index);
case 1: return (short)Vector.ExtractUInt16(Index);
case 2: return (int)Vector.ExtractUInt32(Index);
case 3: return (long)Vector.ExtractUInt64(Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
public static float VectorExtractSingle(AVec Vector, int Index)
{
return Vector.ExtractSingle(Index);
}
public static double VectorExtractDouble(AVec Vector, int Index)
{
return Vector.ExtractDouble(Index);
}
public static AVec VectorInsertSingle(float Value, AVec Vector, int Index)
{
return AVec.InsertSingle(Vector, Index, Value);
}
public static AVec VectorInsertDouble(double Value, AVec Vector, int Index)
{
return AVec.InsertDouble(Vector, Index, Value);
}
public static AVec VectorInsertInt(ulong Value, AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return AVec.InsertByte (Vector, Index, (byte)Value);
case 1: return AVec.InsertUInt16(Vector, Index, (ushort)Value);
case 2: return AVec.InsertUInt32(Vector, Index, (uint)Value);
case 3: return AVec.InsertUInt64(Vector, Index, (ulong)Value);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
}
}

View file

@ -0,0 +1,626 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instruction
{
static class AVectorHelper
{
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
Context.EmitCall(typeof(AVectorHelper), IsSimd64 ? Name64 : Name128);
}
public static void EmitCall(AILEmitterCtx Context, string MthdName)
{
Context.EmitCall(typeof(AVectorHelper), MthdName);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
public static int CountSetBits8(byte Value)
{
return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
((Value >> 2) & 1) + ((Value >> 3) & 1) +
((Value >> 4) & 1) + ((Value >> 5) & 1) +
((Value >> 6) & 1) + (Value >> 7);
}
public static double Max(double LHS, double RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.DoubleToInt64Bits(LHS) < 0 &&
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
}
if (LHS > RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
}
public static float MaxF(float LHS, float RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.SingleToInt32Bits(LHS) < 0 &&
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
}
if (LHS > RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
}
public static double Min(double LHS, double RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.DoubleToInt64Bits(LHS) < 0 ||
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
}
if (LHS < RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
}
public static float MinF(float LHS, float RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.SingleToInt32Bits(LHS) < 0 ||
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
}
if (LHS < RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
}
public static double Round(double Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return Math.Round (Value);
case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value);
case ARoundMode.TowardsZero: return Math.Truncate(Value);
}
throw new InvalidOperationException();
}
public static float RoundF(float Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return MathF.Round (Value);
case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value);
case ARoundMode.TowardsZero: return MathF.Truncate(Value);
}
throw new InvalidOperationException();
}
public static Vector128<float> Tbl1_V64(
Vector128<float> Vector,
Vector128<float> Tb0)
{
return Tbl(Vector, 8, Tb0);
}
public static Vector128<float> Tbl1_V128(
Vector128<float> Vector,
Vector128<float> Tb0)
{
return Tbl(Vector, 16, Tb0);
}
public static Vector128<float> Tbl2_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1)
{
return Tbl(Vector, 8, Tb0, Tb1);
}
public static Vector128<float> Tbl2_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1)
{
return Tbl(Vector, 16, Tb0, Tb1);
}
public static Vector128<float> Tbl3_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2);
}
public static Vector128<float> Tbl3_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2);
}
public static Vector128<float> Tbl4_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2,
Vector128<float> Tb3)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
}
public static Vector128<float> Tbl4_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2,
Vector128<float> Tb3)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
}
private static Vector128<float> Tbl(Vector128<float> Vector, int Bytes, params Vector128<float>[] Tb)
{
Vector128<float> Res = new Vector128<float>();
byte[] Table = new byte[Tb.Length * 16];
for (byte Index = 0; Index < Tb.Length; Index++)
for (byte Index2 = 0; Index2 < 16; Index2++)
{
Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
}
for (byte Index = 0; Index < Bytes; Index++)
{
byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
if (TblIdx < Table.Length)
{
Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
}
}
return Res;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double VectorExtractDouble(Vector128<float> Vector, byte Index)
{
return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long VectorExtractIntSx(Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
case 1:
return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
case 2:
return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
case 3:
return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
switch (Size)
{
case 0:
return (sbyte)VectorExtractIntZx(Vector, Index, Size);
case 1:
return (short)VectorExtractIntZx(Vector, Index, Size);
case 2:
return (int)VectorExtractIntZx(Vector, Index, Size);
case 3:
return (long)VectorExtractIntZx(Vector, Index, Size);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong VectorExtractIntZx(Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
case 1:
return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
case 2:
return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
case 3:
return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
int ShortIdx = Size == 0
? Index >> 1
: Index << (Size - 1);
ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
switch (Size)
{
case 0:
return (byte)(Value >> (Index & 1) * 8);
case 1:
return Value;
case 2:
case 3:
{
ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
if (Size == 2)
{
return (uint)(Value | (Value1 << 16));
}
ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
return ((ulong)Value << 0) |
((ulong)Value1 << 16) |
((ulong)Value2 << 32) |
((ulong)Value3 << 48);
}
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float VectorExtractSingle(Vector128<float> Vector, byte Index)
{
if (Sse41.IsSupported)
{
return Sse41.Extract(Vector, Index);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertDouble(double Value, Vector128<float> Vector, byte Index)
{
return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(Value), Vector, Index, 3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertInt(ulong Value, Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
case 1:
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
case 2:
return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
case 3:
return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector);
int ShortIdx = Size == 0
? Index >> 1
: Index << (Size - 1);
switch (Size)
{
case 0:
{
ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
int Shift = (Index & 1) * 8;
ShortVal &= (ushort)(0xff00 >> Shift);
ShortVal |= (ushort)((byte)Value << Shift);
return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
}
case 1:
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
case 2:
case 3:
{
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0));
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
if (Size == 3)
{
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
}
return Sse.StaticCast<ushort, float>(ShortVector);
}
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertSingle(float Value, Vector128<float> Vector, byte Index)
{
if (Sse41.IsSupported)
{
return Sse41.Insert(Vector, Value, (byte)(Index << 4));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, sbyte>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> VectorSingleToInt16(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, short>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> VectorSingleToInt32(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, int>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> VectorSingleToInt64(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, long>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, double>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<sbyte, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt16ToSingle(Vector128<short> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<short, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt32ToSingle(Vector128<int> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<int, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt64ToSingle(Vector128<long> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<long, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<double, float>(Vector);
}
throw new PlatformNotSupportedException();
}
}
}

View file

@ -3,6 +3,8 @@ using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Memory
{
@ -189,33 +191,73 @@ namespace ChocolArm64.Memory
return ReadUInt64Unchecked(Position);
}
public AVec ReadVector8(long Position)
public Vector128<float> ReadVector8(long Position)
{
return new AVec() { B0 = ReadByte(Position) };
}
public AVec ReadVector16(long Position)
{
return new AVec() { H0 = ReadUInt16(Position) };
}
public AVec ReadVector32(long Position)
{
return new AVec() { W0 = ReadUInt32(Position) };
}
public AVec ReadVector64(long Position)
{
return new AVec() { X0 = ReadUInt64(Position) };
}
public AVec ReadVector128(long Position)
{
return new AVec()
if (Sse2.IsSupported)
{
X0 = ReadUInt64(Position + 0),
X1 = ReadUInt64(Position + 8)
};
return Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector16(long Position)
{
if (Sse2.IsSupported)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse2.SetZeroVector128<ushort>(), ReadUInt16(Position), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector32(long Position)
{
EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
EnsureAccessIsValid(Position + 3, AMemoryPerm.Read);
if (Sse.IsSupported)
{
return Sse.LoadScalarVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector64(long Position)
{
EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
EnsureAccessIsValid(Position + 7, AMemoryPerm.Read);
if (Sse2.IsSupported)
{
return Sse.StaticCast<double, float>(Sse2.LoadScalarVector128((double*)(RamPtr + (uint)Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector128(long Position)
{
EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
EnsureAccessIsValid(Position + 15, AMemoryPerm.Read);
if (Sse.IsSupported)
{
return Sse.LoadVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public sbyte ReadSByteUnchecked(long Position)
@ -258,33 +300,64 @@ namespace ChocolArm64.Memory
return *((ulong*)(RamPtr + (uint)Position));
}
public AVec ReadVector8Unchecked(long Position)
public Vector128<float> ReadVector8Unchecked(long Position)
{
return new AVec() { B0 = ReadByteUnchecked(Position) };
}
public AVec ReadVector16Unchecked(long Position)
{
return new AVec() { H0 = ReadUInt16Unchecked(Position) };
}
public AVec ReadVector32Unchecked(long Position)
{
return new AVec() { W0 = ReadUInt32Unchecked(Position) };
}
public AVec ReadVector64Unchecked(long Position)
{
return new AVec() { X0 = ReadUInt64Unchecked(Position) };
}
public AVec ReadVector128Unchecked(long Position)
{
return new AVec()
if (Sse2.IsSupported)
{
X0 = ReadUInt64Unchecked(Position + 0),
X1 = ReadUInt64Unchecked(Position + 8)
};
return Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector16Unchecked(long Position)
{
if (Sse2.IsSupported)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse2.SetZeroVector128<ushort>(), ReadUInt16Unchecked(Position), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector32Unchecked(long Position)
{
if (Sse.IsSupported)
{
return Sse.LoadScalarVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector64Unchecked(long Position)
{
if (Sse2.IsSupported)
{
return Sse.StaticCast<double, float>(Sse2.LoadScalarVector128((double*)(RamPtr + (uint)Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public Vector128<float> ReadVector128Unchecked(long Position)
{
if (Sse.IsSupported)
{
return Sse.LoadVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteSByte(long Position, sbyte Value)
@ -338,30 +411,77 @@ namespace ChocolArm64.Memory
WriteUInt64Unchecked(Position, Value);
}
public void WriteVector8(long Position, AVec Value)
public void WriteVector8(long Position, Vector128<float> Value)
{
WriteByte(Position, Value.B0);
if (Sse41.IsSupported)
{
WriteByte(Position, Sse41.Extract(Sse.StaticCast<float, byte>(Value), 0));
}
else if (Sse2.IsSupported)
{
WriteByte(Position, (byte)Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector16(long Position, AVec Value)
public void WriteVector16(long Position, Vector128<float> Value)
{
WriteUInt16(Position, Value.H0);
if (Sse2.IsSupported)
{
WriteUInt16(Position, Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector32(long Position, AVec Value)
public void WriteVector32(long Position, Vector128<float> Value)
{
WriteUInt32(Position, Value.W0);
EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
EnsureAccessIsValid(Position + 3, AMemoryPerm.Write);
if (Sse.IsSupported)
{
Sse.StoreScalar((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector64(long Position, AVec Value)
public void WriteVector64(long Position, Vector128<float> Value)
{
WriteUInt64(Position, Value.X0);
EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
EnsureAccessIsValid(Position + 7, AMemoryPerm.Write);
if (Sse2.IsSupported)
{
Sse2.StoreScalar((double*)(RamPtr + (uint)Position), Sse.StaticCast<float, double>(Value));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector128(long Position, AVec Value)
public void WriteVector128(long Position, Vector128<float> Value)
{
WriteUInt64(Position + 0, Value.X0);
WriteUInt64(Position + 8, Value.X1);
EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
EnsureAccessIsValid(Position + 15, AMemoryPerm.Write);
if (Sse.IsSupported)
{
Sse.Store((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteSByteUnchecked(long Position, sbyte Value)
@ -404,30 +524,68 @@ namespace ChocolArm64.Memory
*((ulong*)(RamPtr + (uint)Position)) = Value;
}
public void WriteVector8Unchecked(long Position, AVec Value)
public void WriteVector8Unchecked(long Position, Vector128<float> Value)
{
WriteByteUnchecked(Position, Value.B0);
if (Sse41.IsSupported)
{
WriteByteUnchecked(Position, Sse41.Extract(Sse.StaticCast<float, byte>(Value), 0));
}
else if (Sse2.IsSupported)
{
WriteByteUnchecked(Position, (byte)Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector16Unchecked(long Position, AVec Value)
public void WriteVector16Unchecked(long Position, Vector128<float> Value)
{
WriteUInt16Unchecked(Position, Value.H0);
if (Sse2.IsSupported)
{
WriteUInt16Unchecked(Position, Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector32Unchecked(long Position, AVec Value)
public void WriteVector32Unchecked(long Position, Vector128<float> Value)
{
WriteUInt32Unchecked(Position, Value.W0);
if (Sse.IsSupported)
{
Sse.StoreScalar((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector64Unchecked(long Position, AVec Value)
public void WriteVector64Unchecked(long Position, Vector128<float> Value)
{
WriteUInt64Unchecked(Position, Value.X0);
if (Sse2.IsSupported)
{
Sse2.StoreScalar((double*)(RamPtr + (uint)Position), Sse.StaticCast<float, double>(Value));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector128Unchecked(long Position, AVec Value)
public void WriteVector128Unchecked(long Position, Vector128<float> Value)
{
WriteUInt64Unchecked(Position + 0, Value.X0);
WriteUInt64Unchecked(Position + 8, Value.X1);
if (Sse.IsSupported)
{
Sse.Store((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
private void EnsureAccessIsValid(long Position, AMemoryPerm Perm)

View file

@ -2,6 +2,7 @@ using ChocolArm64.Events;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.Intrinsics;
namespace ChocolArm64.State
{
@ -18,10 +19,10 @@ namespace ChocolArm64.State
X16, X17, X18, X19, X20, X21, X22, X23,
X24, X25, X26, X27, X28, X29, X30, X31;
public AVec V0, V1, V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13, V14, V15,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30, V31;
public Vector128<float> V0, V1, V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13, V14, V15,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30, V31;
public bool Overflow;
public bool Carry;

View file

@ -1,243 +0,0 @@
using System;
using System.Runtime.InteropServices;
namespace ChocolArm64.State
{
[StructLayout(LayoutKind.Explicit, Size = 16)]
public struct AVec
{
[FieldOffset(0x0)] public byte B0;
[FieldOffset(0x1)] public byte B1;
[FieldOffset(0x2)] public byte B2;
[FieldOffset(0x3)] public byte B3;
[FieldOffset(0x4)] public byte B4;
[FieldOffset(0x5)] public byte B5;
[FieldOffset(0x6)] public byte B6;
[FieldOffset(0x7)] public byte B7;
[FieldOffset(0x8)] public byte B8;
[FieldOffset(0x9)] public byte B9;
[FieldOffset(0xa)] public byte B10;
[FieldOffset(0xb)] public byte B11;
[FieldOffset(0xc)] public byte B12;
[FieldOffset(0xd)] public byte B13;
[FieldOffset(0xe)] public byte B14;
[FieldOffset(0xf)] public byte B15;
[FieldOffset(0x0)] public ushort H0;
[FieldOffset(0x2)] public ushort H1;
[FieldOffset(0x4)] public ushort H2;
[FieldOffset(0x6)] public ushort H3;
[FieldOffset(0x8)] public ushort H4;
[FieldOffset(0xa)] public ushort H5;
[FieldOffset(0xc)] public ushort H6;
[FieldOffset(0xe)] public ushort H7;
[FieldOffset(0x0)] public uint W0;
[FieldOffset(0x4)] public uint W1;
[FieldOffset(0x8)] public uint W2;
[FieldOffset(0xc)] public uint W3;
[FieldOffset(0x0)] public float S0;
[FieldOffset(0x4)] public float S1;
[FieldOffset(0x8)] public float S2;
[FieldOffset(0xc)] public float S3;
[FieldOffset(0x0)] public ulong X0;
[FieldOffset(0x8)] public ulong X1;
[FieldOffset(0x0)] public double D0;
[FieldOffset(0x8)] public double D1;
public byte ExtractByte(int Index)
{
switch (Index)
{
case 0: return B0;
case 1: return B1;
case 2: return B2;
case 3: return B3;
case 4: return B4;
case 5: return B5;
case 6: return B6;
case 7: return B7;
case 8: return B8;
case 9: return B9;
case 10: return B10;
case 11: return B11;
case 12: return B12;
case 13: return B13;
case 14: return B14;
case 15: return B15;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public ushort ExtractUInt16(int Index)
{
switch (Index)
{
case 0: return H0;
case 1: return H1;
case 2: return H2;
case 3: return H3;
case 4: return H4;
case 5: return H5;
case 6: return H6;
case 7: return H7;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public uint ExtractUInt32(int Index)
{
switch (Index)
{
case 0: return W0;
case 1: return W1;
case 2: return W2;
case 3: return W3;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public float ExtractSingle(int Index)
{
switch (Index)
{
case 0: return S0;
case 1: return S1;
case 2: return S2;
case 3: return S3;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public ulong ExtractUInt64(int Index)
{
switch (Index)
{
case 0: return X0;
case 1: return X1;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public double ExtractDouble(int Index)
{
switch (Index)
{
case 0: return D0;
case 1: return D1;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public static AVec InsertByte(AVec Vec, int Index, byte Value)
{
switch (Index)
{
case 0: Vec.B0 = Value; break;
case 1: Vec.B1 = Value; break;
case 2: Vec.B2 = Value; break;
case 3: Vec.B3 = Value; break;
case 4: Vec.B4 = Value; break;
case 5: Vec.B5 = Value; break;
case 6: Vec.B6 = Value; break;
case 7: Vec.B7 = Value; break;
case 8: Vec.B8 = Value; break;
case 9: Vec.B9 = Value; break;
case 10: Vec.B10 = Value; break;
case 11: Vec.B11 = Value; break;
case 12: Vec.B12 = Value; break;
case 13: Vec.B13 = Value; break;
case 14: Vec.B14 = Value; break;
case 15: Vec.B15 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertUInt16(AVec Vec, int Index, ushort Value)
{
switch (Index)
{
case 0: Vec.H0 = Value; break;
case 1: Vec.H1 = Value; break;
case 2: Vec.H2 = Value; break;
case 3: Vec.H3 = Value; break;
case 4: Vec.H4 = Value; break;
case 5: Vec.H5 = Value; break;
case 6: Vec.H6 = Value; break;
case 7: Vec.H7 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertUInt32(AVec Vec, int Index, uint Value)
{
switch (Index)
{
case 0: Vec.W0 = Value; break;
case 1: Vec.W1 = Value; break;
case 2: Vec.W2 = Value; break;
case 3: Vec.W3 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertSingle(AVec Vec, int Index, float Value)
{
switch (Index)
{
case 0: Vec.S0 = Value; break;
case 1: Vec.S1 = Value; break;
case 2: Vec.S2 = Value; break;
case 3: Vec.S3 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertUInt64(AVec Vec, int Index, ulong Value)
{
switch (Index)
{
case 0: Vec.X0 = Value; break;
case 1: Vec.X1 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertDouble(AVec Vec, int Index, double Value)
{
switch (Index)
{
case 0: Vec.D0 = Value; break;
case 1: Vec.D1 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
}
}

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
namespace ChocolArm64.Translation
{
@ -157,7 +158,7 @@ namespace ChocolArm64.Translation
{
case ARegisterType.Flag: return typeof(bool);
case ARegisterType.Int: return typeof(ulong);
case ARegisterType.Vector: return typeof(AVec);
case ARegisterType.Vector: return typeof(Vector128<float>);
}
throw new ArgumentException(nameof(RegType));