void channel_loop(body_t body, tail_t tail) {
        size_t num_loops = num_c16_blocks_ / unroll_regs_;
        size_t loop_tail = num_c16_blocks_ - num_loops * unroll_regs_;

        mov(reg_coff_s8, reg_soff);
        xor_(reg_coff_f32, reg_coff_f32);
        if (num_loops) {
            xor_(reg_tmp, reg_tmp);
            add(reg_tmp, c_in_xmm_ * unroll_regs_);

            Label c_loop;
            L(c_loop); {

                body(unroll_regs_);

                add(reg_coff_s8, c_in_xmm_ * unroll_regs_);
                add(reg_coff_f32, sizeof(float) * c_in_xmm_ * unroll_regs_);
                add(reg_tmp, c_in_xmm_ * unroll_regs_);
                cmp(reg_tmp, reg_coff_max);
                jle(c_loop);
            }
        }

        if (loop_tail)
            body(loop_tail);

        if (c_tail_) {
            add(reg_coff_s8, c_in_xmm_ * loop_tail);
            add(reg_coff_f32, sizeof(float) * c_in_xmm_ * loop_tail);

            tail();
        }
    }
void GSDrawScanlineCodeGenerator::Generate()
{
	// TODO: on linux/mac rsi, rdi, xmm6-xmm15 are all caller saved

	push(rbx);
	push(rsi);
	push(rdi);
	push(rbp);
	push(r12);
	push(r13);

	sub(rsp, 8 + 10 * 16);
	
	for(int i = 6; i < 16; i++)
	{
		vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i));
	}

	mov(r10, (size_t)&m_test[0]);
	mov(r11, (size_t)&m_local);
	mov(r12, (size_t)m_local.gd);
	mov(r13, (size_t)m_local.gd->vm);

	Init();

	// rcx = steps
	// rsi = fza_base
	// rdi = fza_offset
	// r10 = &m_test[0]
	// r11 = &m_local
	// r12 = m_local->gd
	// r13 = m_local->gd.vm
	// xmm7 = vf (sprite && ltf)
	// xmm8 = z
	// xmm9 = f
	// xmm10 = s
	// xmm11 = t
	// xmm12 = q
	// xmm13 = rb
	// xmm14 = ga 
	// xmm15 = test

	if(!m_sel.edge)
	{
		align(16);
	}

L("loop");

	TestZ(xmm5, xmm6);

	// ebp = za

	if(m_sel.mmin)
	{
		SampleTextureLOD();
	}
	else
	{
		SampleTexture();
	}

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga

	AlphaTFX();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga

	ReadMask();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	TestAlpha();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	ColorTFX();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	Fog();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	ReadFrame();

	// ebx = fa
	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm
	// xmm6 = fd

	TestDestAlpha();

	// ebx = fa
	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm
	// xmm6 = fd

	WriteMask();

	// ebx = fa
	// edx = fzm
	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm
	// xmm6 = fd

	WriteZBuf();

	// ebx = fa
	// edx = fzm
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm6 = fd

	AlphaBlend();

	// ebx = fa
	// edx = fzm
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm6 = fd

	WriteFrame();

L("step");

	// if(steps <= 0) break;

	if(!m_sel.edge)
	{
		test(rcx, rcx);

		jle("exit", T_NEAR);

		Step();

		jmp("loop", T_NEAR);
	}

L("exit");

	for(int i = 6; i < 16; i++)
	{
		vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]);
	}

	add(rsp, 8 + 10 * 16);

	pop(r13);
	pop(r12);
	pop(rbp);
	pop(rdi);
	pop(rsi);
	pop(rbx);

	ret();
}
Ejemplo n.º 3
0
void GPUDrawScanlineCodeGenerator::Generate()
{
	push(esi);
	push(edi);

	Init();

	align(16);

L("loop");

	// GSVector4i test = m_test[7 + (steps & (steps >> 31))];

	mov(edx, ecx);
	sar(edx, 31);
	and(edx, ecx);
	shl(edx, 4);

	movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);

	// movdqu(xmm1, ptr[edi]);

	movq(xmm1, qword[edi]);
	movhps(xmm1, qword[edi + 8]);

	// ecx = steps
	// esi = tex (tme)
	// edi = fb
	// xmm1 = fd
	// xmm2 = s
	// xmm3 = t
	// xmm4 = r
	// xmm5 = g
	// xmm6 = b
	// xmm7 = test

	TestMask();

	SampleTexture();

	// xmm1 = fd
	// xmm3 = a
	// xmm4 = r
	// xmm5 = g
	// xmm6 = b
	// xmm7 = test
	// xmm0, xmm2 = free

	ColorTFX();

	AlphaBlend();

	Dither();

	WriteFrame();

L("step");

	// if(steps <= 0) break;

	test(ecx, ecx);
	jle("exit", T_NEAR);

	Step();

	jmp("loop", T_NEAR);

L("exit");

	pop(edi);
	pop(esi);

	ret(8);
}
void GSDrawScanlineCodeGenerator::Generate()
{
    push(ebx);
    push(esi);
    push(edi);
    push(ebp);

    const int params = 16;

    Init(params);

    if(!m_sel.edge)
    {
        align(16);
    }

    L("loop");

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // xmm0 = z/zi
    // xmm2 = u (tme)
    // xmm3 = v (tme)
    // xmm5 = rb (!tme)
    // xmm6 = ga (!tme)
    // xmm7 = test

    bool tme = m_sel.tfx != TFX_NONE;

    TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // - xmm0
    // xmm2 = u (tme)
    // xmm3 = v (tme)
    // xmm5 = rb (!tme)
    // xmm6 = ga (!tme)
    // xmm7 = test

    SampleTexture();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // - xmm2
    // - xmm3
    // - xmm4
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    AlphaTFX();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc)
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    if(m_sel.fwrite)
    {
        movdqa(xmm3, xmmword[&m_env.fm]);
    }

    if(m_sel.zwrite)
    {
        movdqa(xmm4, xmmword[&m_env.zm]);
    }

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc)
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    TestAlpha();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc)
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    ColorTFX();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    Fog();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    ReadFrame();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = fd
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    TestDestAlpha();

    // fm |= test;
    // zm |= test;

    if(m_sel.fwrite)
    {
        por(xmm3, xmm7);
    }

    if(m_sel.zwrite)
    {
        por(xmm4, xmm7);
    }

    // int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();

    pcmpeqd(xmm1, xmm1);

    if(m_sel.fwrite && m_sel.zwrite)
    {
        movdqa(xmm0, xmm1);
        pcmpeqd(xmm1, xmm3);
        pcmpeqd(xmm0, xmm4);
        packssdw(xmm1, xmm0);
    }
    else if(m_sel.fwrite)
    {
        pcmpeqd(xmm1, xmm3);
        packssdw(xmm1, xmm1);
    }
    else if(m_sel.zwrite)
    {
        pcmpeqd(xmm1, xmm4);
        packssdw(xmm1, xmm1);
    }

    pmovmskb(edx, xmm1);
    not(edx);

    // ebx = fa
    // ecx = steps
    // edx = fzm
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = fd
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga

    WriteZBuf();

    // ebx = fa
    // ecx = steps
    // edx = fzm
    // esi = fzbr
    // edi = fzbc
    // - ebp
    // xmm2 = fd
    // xmm3 = fm
    // - xmm4
    // xmm5 = rb
    // xmm6 = ga

    AlphaBlend();

    // ebx = fa
    // ecx = steps
    // edx = fzm
    // esi = fzbr
    // edi = fzbc
    // xmm2 = fd
    // xmm3 = fm
    // xmm5 = rb
    // xmm6 = ga

    WriteFrame(params);

    L("step");

    // if(steps <= 0) break;

    if(!m_sel.edge)
    {
        test(ecx, ecx);
        jle("exit", T_NEAR);

        Step();

        jmp("loop", T_NEAR);
    }

    L("exit");

    pop(ebp);
    pop(edi);
    pop(esi);
    pop(ebx);

    ret(8);
}
Ejemplo n.º 5
0
int encode_op(char *opcode, char *op_data)
{
	int rd,rs,rt,imm,funct,shaft,target;
	char tmp[256];
	const char *fi = "%s %d";
	const char *fg = "%s %%g%d";
	const char *ff = "%s %%f%d";
	const char *fl = "%s %s";
	const char *fgi = "%s %%g%d, %d";
	const char *fgl = "%s %%g%d, %s";
	const char *fgg = "%s %%g%d, %%g%d";
	const char *fggl = "%s %%g%d, %%g%d, %s";
	const char *fggi = "%s %%g%d, %%g%d, %d";
	const char *fggg = "%s %%g%d, %%g%d, %%g%d";
	const char *fff = "%s %%f%d, %%f%d";
	const char *fgf = "%s %%g%d, %%f%d";
	const char *ffg = "%s %%f%d, %%g%d";
	const char *fffl = "%s %%f%d, %%f%d, %s";
	const char *ffff = "%s %%f%d, %%f%d, %%f%d";
	const char *ffgi = "%s %%f%d, %%g%d, %d";
	const char *ffgg = "%s %%f%d, %%g%d, %%g%d";
	char lname[256];

	shaft = funct = target = 0;

	if(strcmp(opcode, "mvhi") == 0){
		if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3)
		    return mvhi(rs,0,imm);
	}
	if(strcmp(opcode, "mvlo") == 0){
		if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3)
		    return mvlo(rs,0,imm);
	}
	if(strcmp(opcode, "add") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return add(rs,rt,rd,0);
	}
	if(strcmp(opcode, "nor") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return nor(rs,rt,rd,0);
	}
	if(strcmp(opcode, "sub") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return sub(rs,rt,rd,0);
	}
	if(strcmp(opcode, "mul") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return mul(rs,rt,rd,0);
	}
	if(strcmp(opcode, "addi") == 0){
		if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4)
		    return addi(rs,rt,imm);
	}
	if(strcmp(opcode, "subi") == 0){
		if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4)
		    return subi(rs,rt,imm);
	}
	if(strcmp(opcode, "muli") == 0){
		if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4)
		    return muli(rs,rt,imm);
	}
	if(strcmp(opcode, "input") == 0){
		if(sscanf(op_data, fg, tmp, &rd) == 2)
		    return input(0,0,rd,0);
	}
	if(strcmp(opcode, "inputw") == 0){
		if(sscanf(op_data, fg, tmp, &rd) == 2)
		    return inputw(0,0,rd,0);
	}
	if(strcmp(opcode, "inputf") == 0){
		if(sscanf(op_data, ff, tmp, &rd) == 2)
		    return inputf(0,0,rd,0);
	}
	if(strcmp(opcode, "output") == 0){
		if(sscanf(op_data, fg, tmp, &rs) == 2)
		    return output(rs,0,0,0);
	}
	if(strcmp(opcode, "outputw") == 0){
		if(sscanf(op_data, fg, tmp, &rs) == 2)
		    return outputw(rs,0,0,0);
	}
	if(strcmp(opcode, "outputf") == 0){
		if(sscanf(op_data, ff, tmp, &rs) == 2)
		    return outputf(rs,0,0,0);
	}
	if(strcmp(opcode, "and") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return _and(rs,rt,rd,0);
	}
	if(strcmp(opcode, "or") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return _or(rs,rt,rd,0);
	}
	if(strcmp(opcode, "sll") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return sll(rs,rt,rd,0);
	}
	if(strcmp(opcode, "srl") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return srl(rs,rt,rd,0);
	}
	if(strcmp(opcode, "slli") == 0){
		if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4)
		    return slli(rs,rt,imm);
	}
	if(strcmp(opcode, "srli") == 0){
		if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4)
		    return srli(rs,rt,imm);
	}
	if(strcmp(opcode, "b") == 0){
		if(sscanf(op_data, fg, tmp, &rs) == 2)
		    return b(rs,0,0,0);
	}
	if(strcmp(opcode, "jmp") == 0){
		if(sscanf(op_data, fl, tmp, lname) == 2) {
			strcpy(label_name[label_cnt],lname);
		    return jmp(label_cnt++);
		}
	}
	if(strcmp(opcode, "jeq") == 0){
		if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) {
			strcpy(label_name[label_cnt],lname);
		    return jeq(rs,rt,label_cnt++);
		}
	}
	if(strcmp(opcode, "jne") == 0){
		if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) {
			strcpy(label_name[label_cnt],lname);
		    return jne(rs,rt,label_cnt++);
		}
	}
	if(strcmp(opcode, "jlt") == 0){
		if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) {
			strcpy(label_name[label_cnt],lname);
		    return jlt(rs,rt,label_cnt++);
		}
	}
	if(strcmp(opcode, "jle") == 0){
		if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) {
			strcpy(label_name[label_cnt],lname);
		    return jle(rs,rt,label_cnt++);
		}
	}
	if(strcmp(opcode, "call") == 0){
		if(sscanf(op_data, fl, tmp, lname) == 2)  {
			strcpy(label_name[label_cnt],lname);
		    return call(label_cnt++);
		}
	}
	if(strcmp(opcode, "callR") == 0){
		if(sscanf(op_data, fg, tmp, &rs) == 2)
		    return callr(rs,0,0,0);
	}
	if(strcmp(opcode, "return") == 0){
		    return _return(0);
	}
	if(strcmp(opcode, "ld") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return ld(rs,rt,rd,0);
	}
	if(strcmp(opcode, "ldi") == 0){
		if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4)
		    return ldi(rs,rt,imm);
	}
	if(strcmp(opcode, "ldlr") == 0){
		if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3)
		    return ldlr(rs,0,imm);
	}
	if(strcmp(opcode, "fld") == 0){
		if(sscanf(op_data, ffgg, tmp, &rd, &rs,&rt) == 4)
		    return fld(rs,rt,rd,0);
	}
	if(strcmp(opcode, "st") == 0){
		if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4)
		    return st(rs,rt,rd,0);
	}
	if(strcmp(opcode, "sti") == 0){
		if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4)
		    return sti(rs,rt,imm);
	}
	if(strcmp(opcode, "stlr") == 0){
		if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3)
		    return stlr(rs,0,imm);
	}
	if(strcmp(opcode, "fst") == 0){
		if(sscanf(op_data, ffgg, tmp, &rd, &rs,&rt) == 4)
		    return fst(rs,rt,rd,0);
	}
	if(strcmp(opcode, "fadd") == 0){
		if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4)
		    return fadd(rs,rt,rd,0);
	}
	if(strcmp(opcode, "fsub") == 0){
		if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4)
		    return fsub(rs,rt,rd,0);
	}
	if(strcmp(opcode, "fmul") == 0){
		if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4)
		    return fmul(rs,rt,rd,0);
	}
	if(strcmp(opcode, "fdiv") == 0){
		if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4)
		    return fdiv(rs,rt,rd,0);
	}
	if(strcmp(opcode, "fsqrt") == 0){
		if(sscanf(op_data, fff, tmp, &rd, &rs) == 3)
		    return fsqrt(rs,0,rd,0);
	}
	if(strcmp(opcode, "fabs") == 0){
		if(sscanf(op_data, fff, tmp, &rd, &rs) == 3)
		    return _fabs(rs,0,rd,0);
	}
	if(strcmp(opcode, "fmov") == 0){
		if(sscanf(op_data, fff, tmp, &rd, &rs) == 3)
		    return fmov(rs,0,rd,0);
	}
	if(strcmp(opcode, "fneg") == 0){
		if(sscanf(op_data, fff, tmp, &rd, &rs) == 3)
		    return fneg(rs,0,rd,0);
	}
	if(strcmp(opcode, "fldi") == 0){
		if(sscanf(op_data, ffgi, tmp, &rt, &rs, &imm) == 4)
		    return fldi(rs,rt,imm);
	}
	if(strcmp(opcode, "fsti") == 0){
		if(sscanf(op_data, ffgi, tmp, &rt, &rs, &imm) == 4)
		    return fsti(rs,rt,imm);
	}
	if(strcmp(opcode, "fjeq") == 0){
		if(sscanf(op_data, fffl, tmp, &rs, &rt, lname) == 4) {
			strcpy(label_name[label_cnt],lname);
		    return fjeq(rs,rt,label_cnt++);
		}
	}
	if(strcmp(opcode, "fjlt") == 0){
		if(sscanf(op_data, fffl, tmp, &rs, &rt, lname) == 4) {
			strcpy(label_name[label_cnt],lname);
		    return fjlt(rs,rt,label_cnt++);
		}
	}
	if(strcmp(opcode, "halt") == 0){
		    return halt(0,0,0,0);
	}
	if(strcmp(opcode, "setL") == 0){
		if(sscanf(op_data, fgl, tmp, &rd, lname) == 3) {
			strcpy(label_name[label_cnt],lname);
		    return setl(0,rd,label_cnt++);
		}
	}
	if(strcmp(opcode, "padd") == 0){
		if(sscanf(op_data, fgi, tmp, &rt, &imm) == 3) {
		    return padd(0,rt,imm);
		}
	}
	if(strcmp(opcode, "link") == 0){
		if(sscanf(op_data, fi, tmp, &imm) == 2) {
		    return link(0,0,imm);
		}
	}
	if(strcmp(opcode, "movlr") == 0){
		return movlr(0,0,0,0);
	}
	if(strcmp(opcode, "btmplr") == 0){
		return btmplr(0,0,0,0);
	}
	/*
	if(strcmp(opcode, "padd") == 0){
		if(sscanf(op_data, fgg, tmp, &rd, &rt) == 3) {
		    return padd(0,rt,d,0);
		}
	}
	*/

	return -1;
}