void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i) { const Address& src = m_sel.tlu ? ptr[r12 + rax * 4 + offsetof(GSScanlineGlobalData, clut)] : ptr[rbx + rax * 4]; if(i == 0) vmovd(eax, addr); else vpextrd(eax, addr, i); if(m_sel.tlu) movzx(rax, byte[rbx + rax]); if(i == 0) vmovd(dst, src); else vpinsrd(dst, src, i); }
void GPUDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr) { for(int i = 0; i < 8; i++) { pextrw(eax, addr, (uint8)i); if(m_sel.tlu) movzx(eax, byte[esi + eax]); const Address& src = m_sel.tlu ? ptr[edx + eax * 2] : ptr[esi + eax * 2]; if(i == 0) movd(dst, src); else pinsrw(dst, src, (uint8)i); } }
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i) { if(!m_cpu.has(util::Cpu::tSSE41) && i > 0) { ASSERT(0); } if(i == 0) movd(eax, addr); else pextrd(eax, addr, i); if(m_sel.tlu) movzx(eax, byte[ebx + eax]); const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4]; if(i == 0) movd(dst, src); else pinsrd(dst, src, i); }
static transfer_control_stub_type gen_transfer_control_stub() { static transfer_control_stub_type addr = NULL; if (addr) { return addr; } const int STUB_SIZE = 255; char * stub = (char *)malloc_fixed_code_for_jit(STUB_SIZE, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_COLD, CAA_Allocate); char * ss = stub; #ifndef NDEBUG memset(stub, 0xcc /*int 3*/, STUB_SIZE); #endif // // ************* LOW LEVEL DEPENDENCY! *************** // This code sequence must be atomic. The "atomicity" effect is achieved by // changing the rsp at the very end of the sequence. // rdx holds the pointer to the stack iterator #if defined (PLATFORM_POSIX) // RDI holds 1st parameter on Linux ss = mov(ss, rdx_opnd, rdi_opnd); #else // RCX holds 1st parameter on Windows ss = mov(ss, rdx_opnd, rcx_opnd); #endif // Restore general registers ss = get_reg(ss, rbp_opnd, rdx_reg, CONTEXT_OFFSET(p_rbp), false); ss = get_reg(ss, rbx_opnd, rdx_reg, CONTEXT_OFFSET(p_rbx), true); ss = get_reg(ss, r12_opnd, rdx_reg, CONTEXT_OFFSET(p_r12), true); ss = get_reg(ss, r13_opnd, rdx_reg, CONTEXT_OFFSET(p_r13), true); ss = get_reg(ss, r14_opnd, rdx_reg, CONTEXT_OFFSET(p_r14), true); ss = get_reg(ss, r15_opnd, rdx_reg, CONTEXT_OFFSET(p_r15), true); ss = get_reg(ss, rsi_opnd, rdx_reg, CONTEXT_OFFSET(p_rsi), true); ss = get_reg(ss, rdi_opnd, rdx_reg, CONTEXT_OFFSET(p_rdi), true); ss = get_reg(ss, r8_opnd, rdx_reg, CONTEXT_OFFSET(p_r8), true); ss = get_reg(ss, r9_opnd, rdx_reg, CONTEXT_OFFSET(p_r9), true); ss = get_reg(ss, r10_opnd, rdx_reg, CONTEXT_OFFSET(p_r10), true); ss = get_reg(ss, r11_opnd, rdx_reg, CONTEXT_OFFSET(p_r11), true); // Get the new RSP M_Base_Opnd saved_rsp(rdx_reg, CONTEXT_OFFSET(rsp)); ss = mov(ss, rax_opnd, saved_rsp); // Store it over return address for future use ss = mov(ss, M_Base_Opnd(rsp_reg, 0), rax_opnd); // Get the new RIP ss = get_reg(ss, rcx_opnd, rdx_reg, CONTEXT_OFFSET(p_rip), false); // Store RIP to [<new RSP> - 136] to preserve 128 bytes under RSP // which are 'reserved' on Linux ss = mov(ss, M_Base_Opnd(rax_reg, -136), rcx_opnd); ss = get_reg(ss, rax_opnd, rdx_reg, CONTEXT_OFFSET(p_rax), true); // Restore processor flags ss = movzx(ss, rcx_opnd, M_Base_Opnd(rdx_reg, CONTEXT_OFFSET(eflags)), size_16); ss = test(ss, rcx_opnd, rcx_opnd); ss = branch8(ss, Condition_Z, Imm_Opnd(size_8, 0)); char* patch_offset = ((char*)ss) - 1; // Store location for jump patch *ss++ = (char)0x9C; // PUSHFQ M_Base_Opnd sflags(rsp_reg, 0); ss = alu(ss, and_opc, sflags, Imm_Opnd(size_32,FLG_CLEAR_MASK), size_32); ss = alu(ss, and_opc, rcx_opnd, Imm_Opnd(size_32,FLG_SET_MASK), size_32); ss = alu(ss, or_opc, sflags, rcx_opnd, size_32); *ss++ = (char)0x9D; // POPFQ // Patch conditional jump POINTER_SIZE_SINT offset = (POINTER_SIZE_SINT)ss - (POINTER_SIZE_SINT)patch_offset - 1; *patch_offset = (char)offset; ss = get_reg(ss, rcx_opnd, rdx_reg, CONTEXT_OFFSET(p_rcx), true, true); ss = get_reg(ss, rdx_opnd, rdx_reg, CONTEXT_OFFSET(p_rdx), true, true); // Setup stack pointer to previously saved value ss = mov(ss, rsp_opnd, M_Base_Opnd(rsp_reg, 0)); // Jump to address stored to [<new RSP> - 136] ss = jump(ss, M_Base_Opnd(rsp_reg, -136)); addr = (transfer_control_stub_type)stub; assert(ss-stub <= STUB_SIZE); /* The following code will be generated: mov rdx,rcx mov rbp,qword ptr [rdx+10h] mov rbp,qword ptr [rbp] mov rbx,qword ptr [rdx+20h] test rbx,rbx je __label1__ mov rbx,qword ptr [rbx] __label1__ ; .... The same for r12,r13,r14,r15,rsi,rdi,r8,r9,r10 mov r11,qword ptr [rdx+88h] test r11,r11 je __label11__ mov r11,qword ptr [r11] __label11__ mov rax,qword ptr [rdx+8] mov qword ptr [rsp],rax mov rcx,qword ptr [rdx+18h] mov rcx,qword ptr [rcx] mov qword ptr [rax-88h],rcx mov rax,qword ptr [rdx+48h] test rax,rax je __label12__ mov rax,qword ptr [rax] __label12__ movzx rcx,word ptr [rdx+90h] test rcx,rcx je __label13__ pushfq and dword ptr [rsp], 0x003F7202 and ecx, 0x00000CD5 or dword ptr [esp], ecx popfq __label13__ mov rcx,qword ptr [rdx+50h] pushfq test rcx,rcx je __label14__ mov rcx,qword ptr [rcx] __label14__ popfq mov rdx,qword ptr [rdx+58h] pushfq test rdx,rdx je __label15__ mov rdx,qword ptr [rdx] __label15__ popfq mov rsp,qword ptr [rsp] jmp qword ptr [rsp-88h] */ DUMP_STUB(stub, "getaddress__transfer_control", ss-stub); return addr; }
struct code encode(struct instruction instr) { switch (instr.opcode) { case INSTR_ADD: return add(instr.optype, instr.source, instr.dest); case INSTR_NOT: return not(instr.optype, instr.source); case INSTR_MUL: return mul(instr.optype, instr.source); case INSTR_XOR: return xor(instr.optype, instr.source, instr.dest); case INSTR_DIV: return encode_div(instr.optype, instr.source); case INSTR_AND: return and(instr.optype, instr.source, instr.dest); case INSTR_OR: return or(instr.optype, instr.source, instr.dest); case INSTR_SHL: return shl(instr.optype, instr.source, instr.dest); case INSTR_SHR: return shr(instr.optype, instr.source, instr.dest); case INSTR_SAR: return sar(instr.optype, instr.source, instr.dest); case INSTR_CALL: return call(instr.optype, instr.source); case INSTR_CMP: return cmp(instr.optype, instr.source, instr.dest); case INSTR_MOV: return mov(instr.optype, instr.source, instr.dest); case INSTR_MOVSX: return movsx(instr.optype, instr.source, instr.dest); case INSTR_MOVZX: return movzx(instr.optype, instr.source, instr.dest); case INSTR_MOVAPS: return movaps(instr.optype, instr.source, instr.dest); case INSTR_PUSH: return push(instr.optype, instr.source); case INSTR_SUB: return sub(instr.optype, instr.source, instr.dest); case INSTR_LEA: return lea(instr.optype, instr.source, instr.dest); case INSTR_LEAVE: return leave(); case INSTR_REP_MOVSQ: assert(instr.optype == OPT_NONE); return rep_movsq(); case INSTR_RET: return ret(); case INSTR_JMP: return jmp(instr.optype, instr.source); case INSTR_JA: return jcc(instr.optype, TEST_A, instr.source); case INSTR_JG: return jcc(instr.optype, TEST_G, instr.source); case INSTR_JZ: return jcc(instr.optype, TEST_Z, instr.source); case INSTR_JAE: return jcc(instr.optype, TEST_AE, instr.source); case INSTR_JGE: return jcc(instr.optype, TEST_GE, instr.source); case INSTR_SETZ: return setcc(instr.optype, TEST_Z, instr.source); case INSTR_SETA: return setcc(instr.optype, TEST_A, instr.source); case INSTR_SETG: return setcc(instr.optype, TEST_G, instr.source); case INSTR_SETAE: return setcc(instr.optype, TEST_AE, instr.source); case INSTR_SETGE: return setcc(instr.optype, TEST_GE, instr.source); case INSTR_TEST: return test(instr.optype, instr.source, instr.dest); default: return nop(); } }