1
0
Fork 0
forked from suyu/suyu

shader_jit_x64: Use Reg32 for LOOP* registers, eliminating casts

This commit is contained in:
Yuri Kunde Schlesner 2016-12-12 21:44:15 -08:00
parent f4e98ecf3f
commit 5ff3206207

View file

@ -109,11 +109,11 @@ static const Reg64 SETUP = r9;
static const Reg64 ADDROFFS_REG_0 = r10; static const Reg64 ADDROFFS_REG_0 = r10;
static const Reg64 ADDROFFS_REG_1 = r11; static const Reg64 ADDROFFS_REG_1 = r11;
/// VS loop count register (Multiplied by 16) /// VS loop count register (Multiplied by 16)
static const Reg64 LOOPCOUNT_REG = r12; static const Reg32 LOOPCOUNT_REG = r12d;
/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
static const Reg64 LOOPCOUNT = rsi; static const Reg32 LOOPCOUNT = esi;
/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
static const Reg64 LOOPINC = rdi; static const Reg32 LOOPINC = edi;
/// Result of the previous CMP instruction for the X-component comparison /// Result of the previous CMP instruction for the X-component comparison
static const Reg64 COND0 = r13; static const Reg64 COND0 = r13;
/// Result of the previous CMP instruction for the Y-component comparison /// Result of the previous CMP instruction for the Y-component comparison
@ -734,23 +734,23 @@ void JitShader::Compile_LOOP(Instruction instr) {
// 4 bits) to be used as an offset into the 16-byte vector registers later // 4 bits) to be used as an offset into the 16-byte vector registers later
size_t offset = size_t offset =
ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
mov(LOOPCOUNT.cvt32(), dword[SETUP + offset]); mov(LOOPCOUNT, dword[SETUP + offset]);
mov(LOOPCOUNT_REG.cvt32(), LOOPCOUNT.cvt32()); mov(LOOPCOUNT_REG, LOOPCOUNT);
shr(LOOPCOUNT_REG.cvt32(), 4); shr(LOOPCOUNT_REG, 4);
and(LOOPCOUNT_REG.cvt32(), 0xFF0); // Y-component is the start and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
mov(LOOPINC.cvt32(), LOOPCOUNT.cvt32()); mov(LOOPINC, LOOPCOUNT);
shr(LOOPINC.cvt32(), 12); shr(LOOPINC, 12);
and(LOOPINC.cvt32(), 0xFF0); // Z-component is the incrementer and(LOOPINC, 0xFF0); // Z-component is the incrementer
movzx(LOOPCOUNT.cvt32(), LOOPCOUNT.cvt8()); // X-component is iteration count movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
add(LOOPCOUNT.cvt32(), 1); // Iteration count is X-component + 1 add(LOOPCOUNT, 1); // Iteration count is X-component + 1
Label l_loop_start; Label l_loop_start;
L(l_loop_start); L(l_loop_start);
Compile_Block(instr.flow_control.dest_offset + 1); Compile_Block(instr.flow_control.dest_offset + 1);
add(LOOPCOUNT_REG.cvt32(), LOOPINC.cvt32()); // Increment LOOPCOUNT_REG by Z-component add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
sub(LOOPCOUNT.cvt32(), 1); // Increment loop count by 1 sub(LOOPCOUNT, 1); // Increment loop count by 1
jnz(l_loop_start); // Loop if not equal jnz(l_loop_start); // Loop if not equal
looping = false; looping = false;
@ -856,7 +856,7 @@ void JitShader::Compile() {
// Zero address/loop registers // Zero address/loop registers
xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
xor(LOOPCOUNT_REG.cvt32(), LOOPCOUNT_REG.cvt32()); xor(LOOPCOUNT_REG, LOOPCOUNT_REG);
// Used to set a register to one // Used to set a register to one
static const __m128 one = {1.f, 1.f, 1.f, 1.f}; static const __m128 one = {1.f, 1.f, 1.f, 1.f};