Improve CPU initial translation speeds (#50)

* Add background translation to the CPU

* Do not use a separate thread for translation, implement 2 tiers translation

* Remove unnecessary usings

* Lower MinCallCountForReJit

* Remove unused variable
This commit is contained in:
gdkchan 2018-03-04 14:09:59 -03:00 committed by GitHub
parent 6dd9cdf337
commit 6d60fcfc24
9 changed files with 318 additions and 104 deletions

View file

@ -2,6 +2,7 @@ using ChocolArm64.Memory;
using ChocolArm64.State; using ChocolArm64.State;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Reflection; using System.Reflection;
using System.Reflection.Emit; using System.Reflection.Emit;
@ -13,35 +14,47 @@ namespace ChocolArm64
private AA64Subroutine ExecDelegate; private AA64Subroutine ExecDelegate;
private bool HasDelegate;
public static Type[] FixedArgTypes { get; private set; }
public static int StateArgIdx { get; private set; } public static int StateArgIdx { get; private set; }
public static int MemoryArgIdx { get; private set; } public static int MemoryArgIdx { get; private set; }
public static Type[] FixedArgTypes { get; private set; }
public DynamicMethod Method { get; private set; } public DynamicMethod Method { get; private set; }
public HashSet<long> SubCalls { get; private set; } public ReadOnlyCollection<ARegister> Params { get; private set; }
public List<ARegister> Params { get; private set; } private HashSet<long> Callees;
public bool NeedsReJit { get; private set; } private ATranslatedSubType Type;
public ATranslatedSub() private int CallCount;
private bool NeedsReJit;
private int MinCallCountForReJit = 250;
public ATranslatedSub(DynamicMethod Method, List<ARegister> Params, HashSet<long> Callees)
{ {
SubCalls = new HashSet<long>(); if (Method == null)
} {
throw new ArgumentNullException(nameof(Method));
}
public ATranslatedSub(DynamicMethod Method, List<ARegister> Params) : this()
{
if (Params == null) if (Params == null)
{ {
throw new ArgumentNullException(nameof(Params)); throw new ArgumentNullException(nameof(Params));
} }
this.Method = Method; if (Callees == null)
this.Params = Params; {
throw new ArgumentNullException(nameof(Callees));
}
this.Method = Method;
this.Params = Params.AsReadOnly();
this.Callees = Callees;
PrepareDelegate();
} }
static ATranslatedSub() static ATranslatedSub()
@ -69,36 +82,53 @@ namespace ChocolArm64
} }
} }
public long Execute(AThreadState ThreadState, AMemory Memory) private void PrepareDelegate()
{ {
if (!HasDelegate) string Name = $"{Method.Name}_Dispatch";
DynamicMethod Mthd = new DynamicMethod(Name, typeof(long), FixedArgTypes);
ILGenerator Generator = Mthd.GetILGenerator();
Generator.EmitLdargSeq(FixedArgTypes.Length);
foreach (ARegister Reg in Params)
{ {
string Name = $"{Method.Name}_Dispatch"; Generator.EmitLdarg(StateArgIdx);
DynamicMethod Mthd = new DynamicMethod(Name, typeof(long), FixedArgTypes); Generator.Emit(OpCodes.Ldfld, Reg.GetField());
ILGenerator Generator = Mthd.GetILGenerator();
Generator.EmitLdargSeq(FixedArgTypes.Length);
foreach (ARegister Reg in Params)
{
Generator.EmitLdarg(StateArgIdx);
Generator.Emit(OpCodes.Ldfld, Reg.GetField());
}
Generator.Emit(OpCodes.Call, Method);
Generator.Emit(OpCodes.Ret);
ExecDelegate = (AA64Subroutine)Mthd.CreateDelegate(typeof(AA64Subroutine));
HasDelegate = true;
} }
Generator.Emit(OpCodes.Call, Method);
Generator.Emit(OpCodes.Ret);
ExecDelegate = (AA64Subroutine)Mthd.CreateDelegate(typeof(AA64Subroutine));
}
public bool ShouldReJit()
{
if (Type == ATranslatedSubType.SubTier0)
{
if (CallCount < MinCallCountForReJit)
{
CallCount++;
}
return CallCount == MinCallCountForReJit;
}
return Type == ATranslatedSubType.SubTier1 && NeedsReJit;
}
public long Execute(AThreadState ThreadState, AMemory Memory)
{
return ExecDelegate(ThreadState, Memory); return ExecDelegate(ThreadState, Memory);
} }
public void MarkForReJit() => NeedsReJit = true; public void SetType(ATranslatedSubType Type) => this.Type = Type;
public bool HasCallee(long Position) => Callees.Contains(Position);
public void MarkForReJit() => NeedsReJit = true;
} }
} }

9
ATranslatedSubType.cs Normal file
View file

@ -0,0 +1,9 @@
namespace ChocolArm64
{
enum ATranslatedSubType
{
SubBlock,
SubTier0,
SubTier1
}
}

View file

@ -7,11 +7,14 @@ using System;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
using System.Reflection.Emit; using System.Reflection.Emit;
using System.Threading;
namespace ChocolArm64 namespace ChocolArm64
{ {
public class ATranslator public class ATranslator
{ {
private HashSet<long> SubBlocks;
private ConcurrentDictionary<long, ATranslatedSub> CachedSubs; private ConcurrentDictionary<long, ATranslatedSub> CachedSubs;
private ConcurrentDictionary<long, string> SymbolTable; private ConcurrentDictionary<long, string> SymbolTable;
@ -24,6 +27,8 @@ namespace ChocolArm64
public ATranslator(IReadOnlyDictionary<long, string> SymbolTable = null) public ATranslator(IReadOnlyDictionary<long, string> SymbolTable = null)
{ {
SubBlocks = new HashSet<long>();
CachedSubs = new ConcurrentDictionary<long, ATranslatedSub>(); CachedSubs = new ConcurrentDictionary<long, ATranslatedSub>();
if (SymbolTable != null) if (SymbolTable != null)
@ -38,9 +43,9 @@ namespace ChocolArm64
KeepRunning = true; KeepRunning = true;
} }
public void StopExecution() => KeepRunning = false; internal void StopExecution() => KeepRunning = false;
public void ExecuteSubroutine(AThread Thread, long Position) internal void ExecuteSubroutine(AThread Thread, long Position)
{ {
do do
{ {
@ -54,9 +59,14 @@ namespace ChocolArm64
CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position, SubName)); CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position, SubName));
} }
if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub) || Sub.NeedsReJit) if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub))
{ {
Sub = TranslateSubroutine(Thread.Memory, Position); Sub = TranslateTier0(Thread.Memory, Position);
}
if (Sub.ShouldReJit())
{
TranslateTier1(Thread.Memory, Position);
} }
Position = Sub.Execute(Thread.ThreadState, Thread.Memory); Position = Sub.Execute(Thread.ThreadState, Thread.Memory);
@ -86,19 +96,57 @@ namespace ChocolArm64
return CachedSubs.ContainsKey(Position); return CachedSubs.ContainsKey(Position);
} }
private ATranslatedSub TranslateSubroutine(AMemory Memory, long Position) private ATranslatedSub TranslateTier0(AMemory Memory, long Position)
{
ABlock Block = ADecoder.DecodeBasicBlock(this, Memory, Position);
ABlock[] Graph = new ABlock[] { Block };
string SubName = GetSubName(Position);
AILEmitterCtx Context = new AILEmitterCtx(this, Graph, Block, SubName);
do
{
Context.EmitOpCode();
}
while (Context.AdvanceOpCode());
ATranslatedSub Subroutine = Context.GetSubroutine();
if (SubBlocks.Contains(Position))
{
SubBlocks.Remove(Position);
Subroutine.SetType(ATranslatedSubType.SubBlock);
}
else
{
Subroutine.SetType(ATranslatedSubType.SubTier0);
}
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
AOpCode LastOp = Block.GetLastOp();
if (LastOp.Emitter != AInstEmit.Ret &&
LastOp.Emitter != AInstEmit.Br)
{
SubBlocks.Add(LastOp.Position + 4);
}
return Subroutine;
}
private void TranslateTier1(AMemory Memory, long Position)
{ {
(ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(this, Memory, Position); (ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(this, Memory, Position);
string SubName = SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}"); string SubName = GetSubName(Position);
PropagateName(Cfg.Graph, SubName); PropagateName(Cfg.Graph, SubName);
AILEmitterCtx Context = new AILEmitterCtx( AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName);
this,
Cfg.Graph,
Cfg.Root,
SubName);
if (Context.CurrBlock.Position != Position) if (Context.CurrBlock.Position != Position)
{ {
@ -115,7 +163,7 @@ namespace ChocolArm64
//since we can now call it directly which is faster. //since we can now call it directly which is faster.
foreach (ATranslatedSub TS in CachedSubs.Values) foreach (ATranslatedSub TS in CachedSubs.Values)
{ {
if (TS.SubCalls.Contains(Position)) if (TS.HasCallee(Position))
{ {
TS.MarkForReJit(); TS.MarkForReJit();
} }
@ -123,9 +171,14 @@ namespace ChocolArm64
ATranslatedSub Subroutine = Context.GetSubroutine(); ATranslatedSub Subroutine = Context.GetSubroutine();
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine); Subroutine.SetType(ATranslatedSubType.SubTier1);
return Subroutine; CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
}
private string GetSubName(long Position)
{
return SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}");
} }
private void PropagateName(ABlock[] Graph, string Name) private void PropagateName(ABlock[] Graph, string Name)

View file

@ -18,6 +18,18 @@ namespace ChocolArm64.Decoder
OpActivators = new ConcurrentDictionary<Type, OpActivator>(); OpActivators = new ConcurrentDictionary<Type, OpActivator>();
} }
public static ABlock DecodeBasicBlock(
ATranslator Translator,
AMemory Memory,
long Start)
{
ABlock Block = new ABlock(Start);
FillBlock(Memory, Block);
return Block;
}
public static (ABlock[] Graph, ABlock Root) DecodeSubroutine( public static (ABlock[] Graph, ABlock Root) DecodeSubroutine(
ATranslator Translator, ATranslator Translator,
AMemory Memory, AMemory Memory,
@ -72,8 +84,8 @@ namespace ChocolArm64.Decoder
} }
} }
if ((!(LastOp is AOpCodeBImmAl) && if (!((LastOp is AOpCodeBImmAl) ||
!(LastOp is AOpCodeBReg)) || HasCachedSub) (LastOp is AOpCodeBReg)) || HasCachedSub)
{ {
Current.Next = Enqueue(Current.EndPosition); Current.Next = Enqueue(Current.EndPosition);
} }

View file

@ -2,6 +2,7 @@ using ChocolArm64.Decoder;
using ChocolArm64.State; using ChocolArm64.State;
using ChocolArm64.Translation; using ChocolArm64.Translation;
using System.Reflection; using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Instruction namespace ChocolArm64.Instruction
{ {
@ -37,6 +38,12 @@ namespace ChocolArm64.Instruction
{ {
Context.EmitLoadState(Context.CurrBlock.Next); Context.EmitLoadState(Context.CurrBlock.Next);
} }
else
{
Context.EmitLdc_I8(Op.Position + 4);
Context.Emit(OpCodes.Ret);
}
} }
public static void Und(AILEmitterCtx Context) public static void Und(AILEmitterCtx Context)
@ -60,6 +67,12 @@ namespace ChocolArm64.Instruction
{ {
Context.EmitLoadState(Context.CurrBlock.Next); Context.EmitLoadState(Context.CurrBlock.Next);
} }
else
{
Context.EmitLdc_I8(Op.Position + 4);
Context.Emit(OpCodes.Ret);
}
} }
} }
} }

View file

@ -11,14 +11,24 @@ namespace ChocolArm64.Instruction
{ {
AOpCodeBImmAl Op = (AOpCodeBImmAl)Context.CurrOp; AOpCodeBImmAl Op = (AOpCodeBImmAl)Context.CurrOp;
Context.Emit(OpCodes.Br, Context.GetLabel(Op.Imm)); if (Context.CurrBlock.Branch != null)
{
Context.Emit(OpCodes.Br, Context.GetLabel(Op.Imm));
}
else
{
Context.EmitStoreState();
Context.EmitLdc_I8(Op.Imm);
Context.Emit(OpCodes.Ret);
}
} }
public static void B_Cond(AILEmitterCtx Context) public static void B_Cond(AILEmitterCtx Context)
{ {
AOpCodeBImmCond Op = (AOpCodeBImmCond)Context.CurrOp; AOpCodeBImmCond Op = (AOpCodeBImmCond)Context.CurrOp;
Context.EmitCondBranch(Context.GetLabel(Op.Imm), Op.Cond); EmitBranch(Context, Op.Cond);
} }
public static void Bl(AILEmitterCtx Context) public static void Bl(AILEmitterCtx Context)
@ -48,10 +58,7 @@ namespace ChocolArm64.Instruction
Context.Emit(OpCodes.Pop); Context.Emit(OpCodes.Pop);
if (Context.CurrBlock.Next != null) Context.EmitLoadState(Context.CurrBlock.Next);
{
Context.EmitLoadState(Context.CurrBlock.Next);
}
} }
else else
{ {
@ -93,7 +100,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdintzr(Op.Rt); Context.EmitLdintzr(Op.Rt);
Context.EmitLdc_I(0); Context.EmitLdc_I(0);
Context.Emit(ILOp, Context.GetLabel(Op.Imm)); EmitBranch(Context, ILOp);
} }
public static void Ret(AILEmitterCtx Context) public static void Ret(AILEmitterCtx Context)
@ -118,7 +125,65 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I(0); Context.EmitLdc_I(0);
Context.Emit(ILOp, Context.GetLabel(Op.Imm)); EmitBranch(Context, ILOp);
}
private static void EmitBranch(AILEmitterCtx Context, ACond Cond)
{
AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp;
if (Context.CurrBlock.Next != null &&
Context.CurrBlock.Branch != null)
{
Context.EmitCondBranch(Context.GetLabel(Op.Imm), Cond);
}
else
{
Context.EmitStoreState();
AILLabel LblTaken = new AILLabel();
Context.EmitCondBranch(LblTaken, Cond);
Context.EmitLdc_I8(Op.Position + 4);
Context.Emit(OpCodes.Ret);
Context.MarkLabel(LblTaken);
Context.EmitLdc_I8(Op.Imm);
Context.Emit(OpCodes.Ret);
}
}
private static void EmitBranch(AILEmitterCtx Context, OpCode ILOp)
{
AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp;
if (Context.CurrBlock.Next != null &&
Context.CurrBlock.Branch != null)
{
Context.Emit(ILOp, Context.GetLabel(Op.Imm));
}
else
{
Context.EmitStoreState();
AILLabel LblTaken = new AILLabel();
Context.Emit(ILOp, LblTaken);
Context.EmitLdc_I8(Op.Position + 4);
Context.Emit(OpCodes.Ret);
Context.MarkLabel(LblTaken);
Context.EmitLdc_I8(Op.Imm);
Context.Emit(OpCodes.Ret);
}
} }
} }
} }

View file

@ -58,11 +58,13 @@ namespace ChocolArm64.Translation
this.Root = ILBlocks[Array.IndexOf(Graph, Root)]; this.Root = ILBlocks[Array.IndexOf(Graph, Root)];
} }
public ATranslatedSub GetSubroutine() public AILBlock GetILBlock(int Index) => ILBlocks[Index];
public ATranslatedSub GetSubroutine(HashSet<long> Callees)
{ {
LocalAlloc = new ALocalAlloc(ILBlocks, Root); LocalAlloc = new ALocalAlloc(ILBlocks, Root);
InitSubroutine(); InitSubroutine(Callees);
InitLocals(); InitLocals();
foreach (AILBlock ILBlock in ILBlocks) foreach (AILBlock ILBlock in ILBlocks)
@ -73,24 +75,7 @@ namespace ChocolArm64.Translation
return Subroutine; return Subroutine;
} }
public AILBlock GetILBlock(int Index) => ILBlocks[Index]; private void InitSubroutine(HashSet<long> Callees)
private void InitLocals()
{
int ParamsStart = ATranslatedSub.FixedArgTypes.Length;
Locals = new Dictionary<ARegister, int>();
for (int Index = 0; Index < Subroutine.Params.Count; Index++)
{
ARegister Reg = Subroutine.Params[Index];
Generator.EmitLdarg(Index + ParamsStart);
Generator.EmitStloc(GetLocalIndex(Reg));
}
}
private void InitSubroutine()
{ {
List<ARegister> Params = new List<ARegister>(); List<ARegister> Params = new List<ARegister>();
@ -114,9 +99,24 @@ namespace ChocolArm64.Translation
Generator = Mthd.GetILGenerator(); Generator = Mthd.GetILGenerator();
Subroutine = new ATranslatedSub(Mthd, Params); Subroutine = new ATranslatedSub(Mthd, Params, Callees);
} }
private void InitLocals()
{
int ParamsStart = ATranslatedSub.FixedArgTypes.Length;
Locals = new Dictionary<ARegister, int>();
for (int Index = 0; Index < Subroutine.Params.Count; Index++)
{
ARegister Reg = Subroutine.Params[Index];
Generator.EmitLdarg(Index + ParamsStart);
Generator.EmitStloc(GetLocalIndex(Reg));
}
}
private Type[] GetParamTypes(IList<ARegister> Params) private Type[] GetParamTypes(IList<ARegister> Params)
{ {
Type[] FixedArgs = ATranslatedSub.FixedArgTypes; Type[] FixedArgs = ATranslatedSub.FixedArgTypes;

View file

@ -12,14 +12,9 @@ namespace ChocolArm64.Translation
{ {
private ATranslator Translator; private ATranslator Translator;
private Dictionary<long, AILLabel> Labels; private HashSet<long> Callees;
private AILEmitter Emitter; private Dictionary<long, AILLabel> Labels;
private AILBlock ILBlock;
private AOpCode OptOpLastCompare;
private AOpCode OptOpLastFlagSet;
private int BlkIndex; private int BlkIndex;
private int OpcIndex; private int OpcIndex;
@ -29,6 +24,13 @@ namespace ChocolArm64.Translation
public ABlock CurrBlock => Graph[BlkIndex]; public ABlock CurrBlock => Graph[BlkIndex];
public AOpCode CurrOp => Graph[BlkIndex].OpCodes[OpcIndex]; public AOpCode CurrOp => Graph[BlkIndex].OpCodes[OpcIndex];
private AILEmitter Emitter;
private AILBlock ILBlock;
private AOpCode OptOpLastCompare;
private AOpCode OptOpLastFlagSet;
//This is the index of the temporary register, used to store temporary //This is the index of the temporary register, used to store temporary
//values needed by some functions, since IL doesn't have a swap instruction. //values needed by some functions, since IL doesn't have a swap instruction.
//You can use any value here as long it doesn't conflict with the indices //You can use any value here as long it doesn't conflict with the indices
@ -45,10 +47,27 @@ namespace ChocolArm64.Translation
ABlock Root, ABlock Root,
string SubName) string SubName)
{ {
if (Translator == null)
{
throw new ArgumentNullException(nameof(Translator));
}
if (Graph == null)
{
throw new ArgumentNullException(nameof(Graph));
}
if (Root == null)
{
throw new ArgumentNullException(nameof(Root));
}
this.Translator = Translator; this.Translator = Translator;
this.Graph = Graph; this.Graph = Graph;
this.Root = Root; this.Root = Root;
Callees = new HashSet<long>();
Labels = new Dictionary<long, AILLabel>(); Labels = new Dictionary<long, AILLabel>();
Emitter = new AILEmitter(Graph, Root, SubName); Emitter = new AILEmitter(Graph, Root, SubName);
@ -57,23 +76,27 @@ namespace ChocolArm64.Translation
OpcIndex = -1; OpcIndex = -1;
if (!AdvanceOpCode()) if (Graph.Length == 0 || !AdvanceOpCode())
{ {
throw new ArgumentException(nameof(Graph)); throw new ArgumentException(nameof(Graph));
} }
} }
public ATranslatedSub GetSubroutine() => Emitter.GetSubroutine(); public ATranslatedSub GetSubroutine()
{
return Emitter.GetSubroutine(Callees);
}
public bool AdvanceOpCode() public bool AdvanceOpCode()
{ {
if (OpcIndex + 1 == CurrBlock.OpCodes.Count &&
BlkIndex + 1 == Graph.Length)
{
return false;
}
while (++OpcIndex >= (CurrBlock?.OpCodes.Count ?? 0)) while (++OpcIndex >= (CurrBlock?.OpCodes.Count ?? 0))
{ {
if (BlkIndex + 1 >= Graph.Length)
{
return false;
}
BlkIndex++; BlkIndex++;
OpcIndex = -1; OpcIndex = -1;
@ -100,6 +123,13 @@ namespace ChocolArm64.Translation
public bool TryOptEmitSubroutineCall() public bool TryOptEmitSubroutineCall()
{ {
Callees.Add(((AOpCodeBImm)CurrOp).Imm);
if (CurrBlock.Next == null)
{
return false;
}
if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub)) if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub))
{ {
return false; return false;

View file

@ -67,14 +67,15 @@ namespace ChocolArm64.Translation
public long VecOutputs; public long VecOutputs;
} }
private const int MaxOptGraphLength = 55; private const int MaxOptGraphLength = 40;
public ALocalAlloc(AILBlock[] Graph, AILBlock Root) public ALocalAlloc(AILBlock[] Graph, AILBlock Root)
{ {
IntPaths = new Dictionary<AILBlock, PathIo>(); IntPaths = new Dictionary<AILBlock, PathIo>();
VecPaths = new Dictionary<AILBlock, PathIo>(); VecPaths = new Dictionary<AILBlock, PathIo>();
if (Graph.Length < MaxOptGraphLength) if (Graph.Length > 1 &&
Graph.Length < MaxOptGraphLength)
{ {
InitializeOptimal(Graph, Root); InitializeOptimal(Graph, Root);
} }
@ -179,10 +180,8 @@ namespace ChocolArm64.Translation
{ {
//This is WAY faster than InitializeOptimal, but results in //This is WAY faster than InitializeOptimal, but results in
//uneeded loads and stores, so the resulting code will be slower. //uneeded loads and stores, so the resulting code will be slower.
long IntInputs = 0; long IntInputs = 0, IntOutputs = 0;
long IntOutputs = 0; long VecInputs = 0, VecOutputs = 0;
long VecInputs = 0;
long VecOutputs = 0;
foreach (AILBlock Block in Graph) foreach (AILBlock Block in Graph)
{ {
@ -196,8 +195,11 @@ namespace ChocolArm64.Translation
//in those cases if we attempt to write an output registers that was //in those cases if we attempt to write an output registers that was
//not written, we will be just writing zero and messing up the old register value. //not written, we will be just writing zero and messing up the old register value.
//So we just need to ensure that all outputs are loaded. //So we just need to ensure that all outputs are loaded.
IntInputs |= IntOutputs; if (Graph.Length > 1)
VecInputs |= VecOutputs; {
IntInputs |= IntOutputs;
VecInputs |= VecOutputs;
}
foreach (AILBlock Block in Graph) foreach (AILBlock Block in Graph)
{ {