void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
{
	const Address& src = m_sel.tlu ? ptr[r12 + rax * 4 + offsetof(GSScanlineGlobalData, clut)] : ptr[rbx + rax * 4];

	if(i == 0) vmovd(eax, addr);
	else vpextrd(eax, addr, i);

	if(m_sel.tlu) movzx(rax, byte[rbx + rax]);

	if(i == 0) vmovd(dst, src);
	else vpinsrd(dst, src, i);
}
void GPUDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr)
{
	for(int i = 0; i < 8; i++)
	{
		pextrw(eax, addr, (uint8)i);

		if(m_sel.tlu) movzx(eax, byte[esi + eax]);

		const Address& src = m_sel.tlu ? ptr[edx + eax * 2] : ptr[esi + eax * 2];

		if(i == 0) movd(dst, src);
		else pinsrw(dst, src, (uint8)i);
	}
}
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
{
    if(!m_cpu.has(util::Cpu::tSSE41) && i > 0)
    {
        ASSERT(0);
    }

    if(i == 0) movd(eax, addr);
    else pextrd(eax, addr, i);

    if(m_sel.tlu) movzx(eax, byte[ebx + eax]);

    const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4];

    if(i == 0) movd(dst, src);
    else pinsrd(dst, src, i);
}
static transfer_control_stub_type gen_transfer_control_stub()
{
    static transfer_control_stub_type addr = NULL;

    if (addr) {
        return addr;
    }

    const int STUB_SIZE = 255;
    char * stub = (char *)malloc_fixed_code_for_jit(STUB_SIZE,
        DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_COLD, CAA_Allocate);
    char * ss = stub;
#ifndef NDEBUG
    memset(stub, 0xcc /*int 3*/, STUB_SIZE);
#endif

    //
    // ************* LOW LEVEL DEPENDENCY! ***************
    // This code sequence must be atomic.  The "atomicity" effect is achieved by
    // changing the rsp at the very end of the sequence.

    // rdx holds the pointer to the stack iterator
#if defined (PLATFORM_POSIX) // RDI holds 1st parameter on Linux
    ss = mov(ss, rdx_opnd, rdi_opnd);
#else // RCX holds 1st parameter on Windows
    ss = mov(ss, rdx_opnd, rcx_opnd);
#endif

    // Restore general registers
    ss = get_reg(ss, rbp_opnd, rdx_reg, CONTEXT_OFFSET(p_rbp), false);
    ss = get_reg(ss, rbx_opnd, rdx_reg, CONTEXT_OFFSET(p_rbx), true);
    ss = get_reg(ss, r12_opnd, rdx_reg, CONTEXT_OFFSET(p_r12), true);
    ss = get_reg(ss, r13_opnd, rdx_reg, CONTEXT_OFFSET(p_r13), true);
    ss = get_reg(ss, r14_opnd, rdx_reg, CONTEXT_OFFSET(p_r14), true);
    ss = get_reg(ss, r15_opnd, rdx_reg, CONTEXT_OFFSET(p_r15), true);
    ss = get_reg(ss, rsi_opnd, rdx_reg, CONTEXT_OFFSET(p_rsi), true);
    ss = get_reg(ss, rdi_opnd, rdx_reg, CONTEXT_OFFSET(p_rdi), true);
    ss = get_reg(ss, r8_opnd,  rdx_reg, CONTEXT_OFFSET(p_r8),  true);
    ss = get_reg(ss, r9_opnd,  rdx_reg, CONTEXT_OFFSET(p_r9),  true);
    ss = get_reg(ss, r10_opnd, rdx_reg, CONTEXT_OFFSET(p_r10), true);
    ss = get_reg(ss, r11_opnd, rdx_reg, CONTEXT_OFFSET(p_r11), true);

    // Get the new RSP
    M_Base_Opnd saved_rsp(rdx_reg, CONTEXT_OFFSET(rsp));
    ss = mov(ss, rax_opnd, saved_rsp);
    // Store it over return address for future use
    ss = mov(ss, M_Base_Opnd(rsp_reg, 0), rax_opnd);
    // Get the new RIP
    ss = get_reg(ss, rcx_opnd, rdx_reg, CONTEXT_OFFSET(p_rip), false);
    // Store RIP to [<new RSP> - 136] to preserve 128 bytes under RSP
    // which are 'reserved' on Linux
    ss = mov(ss,  M_Base_Opnd(rax_reg, -136), rcx_opnd);

    ss = get_reg(ss, rax_opnd, rdx_reg, CONTEXT_OFFSET(p_rax), true);

    // Restore processor flags
    ss = movzx(ss, rcx_opnd,  M_Base_Opnd(rdx_reg, CONTEXT_OFFSET(eflags)), size_16);
    ss = test(ss, rcx_opnd, rcx_opnd);
    ss = branch8(ss, Condition_Z,  Imm_Opnd(size_8, 0));
    char* patch_offset = ((char*)ss) - 1; // Store location for jump patch
    *ss++ = (char)0x9C; // PUSHFQ
    M_Base_Opnd sflags(rsp_reg, 0);
    ss = alu(ss, and_opc, sflags, Imm_Opnd(size_32,FLG_CLEAR_MASK), size_32);
    ss = alu(ss, and_opc, rcx_opnd, Imm_Opnd(size_32,FLG_SET_MASK), size_32);
    ss = alu(ss, or_opc, sflags, rcx_opnd, size_32);
    *ss++ = (char)0x9D; // POPFQ
    // Patch conditional jump
    POINTER_SIZE_SINT offset =
        (POINTER_SIZE_SINT)ss - (POINTER_SIZE_SINT)patch_offset - 1;
    *patch_offset = (char)offset;

    ss = get_reg(ss, rcx_opnd, rdx_reg, CONTEXT_OFFSET(p_rcx), true, true);
    ss = get_reg(ss, rdx_opnd, rdx_reg, CONTEXT_OFFSET(p_rdx), true, true);

    // Setup stack pointer to previously saved value
    ss = mov(ss,  rsp_opnd,  M_Base_Opnd(rsp_reg, 0));

    // Jump to address stored to [<new RSP> - 136]
    ss = jump(ss,  M_Base_Opnd(rsp_reg, -136));

    addr = (transfer_control_stub_type)stub;
    assert(ss-stub <= STUB_SIZE);

    /*
       The following code will be generated:

        mov         rdx,rcx
        mov         rbp,qword ptr [rdx+10h]
        mov         rbp,qword ptr [rbp]
        mov         rbx,qword ptr [rdx+20h]
        test        rbx,rbx
        je          __label1__
        mov         rbx,qword ptr [rbx]
__label1__
        ; .... The same for r12,r13,r14,r15,rsi,rdi,r8,r9,r10
        mov         r11,qword ptr [rdx+88h]
        test        r11,r11
        je          __label11__
        mov         r11,qword ptr [r11]
__label11__
        mov         rax,qword ptr [rdx+8]
        mov         qword ptr [rsp],rax
        mov         rcx,qword ptr [rdx+18h]
        mov         rcx,qword ptr [rcx]
        mov         qword ptr [rax-88h],rcx
        mov         rax,qword ptr [rdx+48h]
        test        rax,rax
        je          __label12__
        mov         rax,qword ptr [rax]
__label12__
        movzx       rcx,word ptr [rdx+90h]
        test        rcx,rcx
        je          __label13__
        pushfq
        and         dword ptr [rsp], 0x003F7202
        and         ecx, 0x00000CD5
        or          dword ptr [esp], ecx
        popfq
__label13__
        mov         rcx,qword ptr [rdx+50h]
        pushfq
        test        rcx,rcx
        je          __label14__
        mov         rcx,qword ptr [rcx]
__label14__
        popfq
        mov         rdx,qword ptr [rdx+58h]
        pushfq
        test        rdx,rdx
        je          __label15__
        mov         rdx,qword ptr [rdx]
__label15__
        popfq
        mov         rsp,qword ptr [rsp]
        jmp         qword ptr [rsp-88h]
    */

    DUMP_STUB(stub, "getaddress__transfer_control", ss-stub);

    return addr;
}
Beispiel #5
0
struct code encode(struct instruction instr)
{
    switch (instr.opcode) {
    case INSTR_ADD:
        return add(instr.optype, instr.source, instr.dest);
    case INSTR_NOT:
        return not(instr.optype, instr.source);
    case INSTR_MUL:
        return mul(instr.optype, instr.source);
    case INSTR_XOR:
        return xor(instr.optype, instr.source, instr.dest);
    case INSTR_DIV:
        return encode_div(instr.optype, instr.source);
    case INSTR_AND:
        return and(instr.optype, instr.source, instr.dest);
    case INSTR_OR:
        return or(instr.optype, instr.source, instr.dest);
    case INSTR_SHL:
        return shl(instr.optype, instr.source, instr.dest);
    case INSTR_SHR:
        return shr(instr.optype, instr.source, instr.dest);
    case INSTR_SAR:
        return sar(instr.optype, instr.source, instr.dest);
    case INSTR_CALL:
        return call(instr.optype, instr.source);
    case INSTR_CMP:
        return cmp(instr.optype, instr.source, instr.dest);
    case INSTR_MOV:
        return mov(instr.optype, instr.source, instr.dest);
    case INSTR_MOVSX:
        return movsx(instr.optype, instr.source, instr.dest);
    case INSTR_MOVZX:
        return movzx(instr.optype, instr.source, instr.dest);
    case INSTR_MOVAPS:
        return movaps(instr.optype, instr.source, instr.dest);
    case INSTR_PUSH:
        return push(instr.optype, instr.source);
    case INSTR_SUB:
        return sub(instr.optype, instr.source, instr.dest);
    case INSTR_LEA:
        return lea(instr.optype, instr.source, instr.dest);
    case INSTR_LEAVE:
        return leave();
    case INSTR_REP_MOVSQ:
        assert(instr.optype == OPT_NONE);
        return rep_movsq();
    case INSTR_RET:
        return ret();
    case INSTR_JMP:
        return jmp(instr.optype, instr.source);
    case INSTR_JA:
        return jcc(instr.optype, TEST_A, instr.source);
    case INSTR_JG:
        return jcc(instr.optype, TEST_G, instr.source);
    case INSTR_JZ:
        return jcc(instr.optype, TEST_Z, instr.source);
    case INSTR_JAE:
        return jcc(instr.optype, TEST_AE, instr.source);
    case INSTR_JGE:
        return jcc(instr.optype, TEST_GE, instr.source);
    case INSTR_SETZ:
        return setcc(instr.optype, TEST_Z, instr.source);
    case INSTR_SETA:
        return setcc(instr.optype, TEST_A, instr.source);
    case INSTR_SETG:
        return setcc(instr.optype, TEST_G, instr.source);
    case INSTR_SETAE:
        return setcc(instr.optype, TEST_AE, instr.source);
    case INSTR_SETGE:
        return setcc(instr.optype, TEST_GE, instr.source);
    case INSTR_TEST:
        return test(instr.optype, instr.source, instr.dest);
    default:
        return nop();
    }
}