void channel_loop(body_t body, tail_t tail) { size_t num_loops = num_c16_blocks_ / unroll_regs_; size_t loop_tail = num_c16_blocks_ - num_loops * unroll_regs_; mov(reg_coff_s8, reg_soff); xor_(reg_coff_f32, reg_coff_f32); if (num_loops) { xor_(reg_tmp, reg_tmp); add(reg_tmp, c_in_xmm_ * unroll_regs_); Label c_loop; L(c_loop); { body(unroll_regs_); add(reg_coff_s8, c_in_xmm_ * unroll_regs_); add(reg_coff_f32, sizeof(float) * c_in_xmm_ * unroll_regs_); add(reg_tmp, c_in_xmm_ * unroll_regs_); cmp(reg_tmp, reg_coff_max); jle(c_loop); } } if (loop_tail) body(loop_tail); if (c_tail_) { add(reg_coff_s8, c_in_xmm_ * loop_tail); add(reg_coff_f32, sizeof(float) * c_in_xmm_ * loop_tail); tail(); } }
void GSDrawScanlineCodeGenerator::Generate() { // TODO: on linux/mac rsi, rdi, xmm6-xmm15 are all caller saved push(rbx); push(rsi); push(rdi); push(rbp); push(r12); push(r13); sub(rsp, 8 + 10 * 16); for(int i = 6; i < 16; i++) { vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i)); } mov(r10, (size_t)&m_test[0]); mov(r11, (size_t)&m_local); mov(r12, (size_t)m_local.gd); mov(r13, (size_t)m_local.gd->vm); Init(); // rcx = steps // rsi = fza_base // rdi = fza_offset // r10 = &m_test[0] // r11 = &m_local // r12 = m_local->gd // r13 = m_local->gd.vm // xmm7 = vf (sprite && ltf) // xmm8 = z // xmm9 = f // xmm10 = s // xmm11 = t // xmm12 = q // xmm13 = rb // xmm14 = ga // xmm15 = test if(!m_sel.edge) { align(16); } L("loop"); TestZ(xmm5, xmm6); // ebp = za if(m_sel.mmin) { SampleTextureLOD(); } else { SampleTexture(); } // ebp = za // xmm2 = rb // xmm3 = ga AlphaTFX(); // ebp = za // xmm2 = rb // xmm3 = ga ReadMask(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm TestAlpha(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm ColorTFX(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm Fog(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm ReadFrame(); // ebx = fa // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm // xmm6 = fd TestDestAlpha(); // ebx = fa // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm // xmm6 = fd WriteMask(); // ebx = fa // edx = fzm // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm // xmm6 = fd WriteZBuf(); // ebx = fa // edx = fzm // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm6 = fd AlphaBlend(); // ebx = fa // edx = fzm // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm6 = fd WriteFrame(); L("step"); // if(steps <= 0) break; if(!m_sel.edge) { test(rcx, rcx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); } L("exit"); for(int i = 6; i < 16; i++) { vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]); } add(rsp, 8 + 10 * 16); pop(r13); pop(r12); pop(rbp); pop(rdi); pop(rsi); pop(rbx); ret(); }
void GPUDrawScanlineCodeGenerator::Generate() { push(esi); push(edi); Init(); align(16); L("loop"); // GSVector4i test = m_test[7 + (steps & (steps >> 31))]; mov(edx, ecx); sar(edx, 31); and(edx, ecx); shl(edx, 4); movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); // movdqu(xmm1, ptr[edi]); movq(xmm1, qword[edi]); movhps(xmm1, qword[edi + 8]); // ecx = steps // esi = tex (tme) // edi = fb // xmm1 = fd // xmm2 = s // xmm3 = t // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test TestMask(); SampleTexture(); // xmm1 = fd // xmm3 = a // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test // xmm0, xmm2 = free ColorTFX(); AlphaBlend(); Dither(); WriteFrame(); L("step"); // if(steps <= 0) break; test(ecx, ecx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); L("exit"); pop(edi); pop(esi); ret(8); }
void GSDrawScanlineCodeGenerator::Generate() { push(ebx); push(esi); push(edi); push(ebp); const int params = 16; Init(params); if(!m_sel.edge) { align(16); } L("loop"); // ecx = steps // esi = fzbr // edi = fzbc // xmm0 = z/zi // xmm2 = u (tme) // xmm3 = v (tme) // xmm5 = rb (!tme) // xmm6 = ga (!tme) // xmm7 = test bool tme = m_sel.tfx != TFX_NONE; TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3); // ecx = steps // esi = fzbr // edi = fzbc // - xmm0 // xmm2 = u (tme) // xmm3 = v (tme) // xmm5 = rb (!tme) // xmm6 = ga (!tme) // xmm7 = test SampleTexture(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // - xmm2 // - xmm3 // - xmm4 // xmm5 = rb // xmm6 = ga // xmm7 = test AlphaTFX(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) // xmm5 = rb // xmm6 = ga // xmm7 = test if(m_sel.fwrite) { movdqa(xmm3, xmmword[&m_env.fm]); } if(m_sel.zwrite) { movdqa(xmm4, xmmword[&m_env.zm]); } // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test TestAlpha(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test ColorTFX(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test Fog(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test ReadFrame(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = fd // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test TestDestAlpha(); // fm |= test; // zm |= test; if(m_sel.fwrite) { por(xmm3, xmm7); } if(m_sel.zwrite) { por(xmm4, xmm7); } // int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask(); pcmpeqd(xmm1, xmm1); if(m_sel.fwrite && m_sel.zwrite) { movdqa(xmm0, xmm1); pcmpeqd(xmm1, xmm3); pcmpeqd(xmm0, xmm4); packssdw(xmm1, xmm0); } else if(m_sel.fwrite) { pcmpeqd(xmm1, xmm3); packssdw(xmm1, xmm1); } else if(m_sel.zwrite) { pcmpeqd(xmm1, xmm4); packssdw(xmm1, xmm1); } pmovmskb(edx, xmm1); not(edx); // ebx = fa // ecx = steps // edx = fzm // esi = fzbr // edi = fzbc // ebp = za // xmm2 = fd // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga WriteZBuf(); // ebx = fa // ecx = steps // edx = fzm // esi = fzbr // edi = fzbc // - ebp // xmm2 = fd // xmm3 = fm // - xmm4 // xmm5 = rb // xmm6 = ga AlphaBlend(); // ebx = fa // ecx = steps // edx = fzm // esi = fzbr // edi = fzbc // xmm2 = fd // xmm3 = fm // xmm5 = rb // xmm6 = ga WriteFrame(params); L("step"); // if(steps <= 0) break; if(!m_sel.edge) { test(ecx, ecx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); } L("exit"); pop(ebp); pop(edi); pop(esi); pop(ebx); ret(8); }
int encode_op(char *opcode, char *op_data) { int rd,rs,rt,imm,funct,shaft,target; char tmp[256]; const char *fi = "%s %d"; const char *fg = "%s %%g%d"; const char *ff = "%s %%f%d"; const char *fl = "%s %s"; const char *fgi = "%s %%g%d, %d"; const char *fgl = "%s %%g%d, %s"; const char *fgg = "%s %%g%d, %%g%d"; const char *fggl = "%s %%g%d, %%g%d, %s"; const char *fggi = "%s %%g%d, %%g%d, %d"; const char *fggg = "%s %%g%d, %%g%d, %%g%d"; const char *fff = "%s %%f%d, %%f%d"; const char *fgf = "%s %%g%d, %%f%d"; const char *ffg = "%s %%f%d, %%g%d"; const char *fffl = "%s %%f%d, %%f%d, %s"; const char *ffff = "%s %%f%d, %%f%d, %%f%d"; const char *ffgi = "%s %%f%d, %%g%d, %d"; const char *ffgg = "%s %%f%d, %%g%d, %%g%d"; char lname[256]; shaft = funct = target = 0; if(strcmp(opcode, "mvhi") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return mvhi(rs,0,imm); } if(strcmp(opcode, "mvlo") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return mvlo(rs,0,imm); } if(strcmp(opcode, "add") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return add(rs,rt,rd,0); } if(strcmp(opcode, "nor") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return nor(rs,rt,rd,0); } if(strcmp(opcode, "sub") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return sub(rs,rt,rd,0); } if(strcmp(opcode, "mul") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return mul(rs,rt,rd,0); } if(strcmp(opcode, "addi") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return addi(rs,rt,imm); } if(strcmp(opcode, "subi") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return subi(rs,rt,imm); } if(strcmp(opcode, "muli") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return muli(rs,rt,imm); } if(strcmp(opcode, "input") == 0){ if(sscanf(op_data, fg, tmp, &rd) == 2) return input(0,0,rd,0); } if(strcmp(opcode, "inputw") == 0){ if(sscanf(op_data, fg, tmp, &rd) == 2) return inputw(0,0,rd,0); } if(strcmp(opcode, "inputf") == 0){ if(sscanf(op_data, ff, tmp, &rd) == 2) return inputf(0,0,rd,0); } if(strcmp(opcode, "output") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return output(rs,0,0,0); } if(strcmp(opcode, "outputw") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return outputw(rs,0,0,0); } if(strcmp(opcode, "outputf") == 0){ if(sscanf(op_data, ff, tmp, &rs) == 2) return outputf(rs,0,0,0); } if(strcmp(opcode, "and") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return _and(rs,rt,rd,0); } if(strcmp(opcode, "or") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return _or(rs,rt,rd,0); } if(strcmp(opcode, "sll") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return sll(rs,rt,rd,0); } if(strcmp(opcode, "srl") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return srl(rs,rt,rd,0); } if(strcmp(opcode, "slli") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return slli(rs,rt,imm); } if(strcmp(opcode, "srli") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return srli(rs,rt,imm); } if(strcmp(opcode, "b") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return b(rs,0,0,0); } if(strcmp(opcode, "jmp") == 0){ if(sscanf(op_data, fl, tmp, lname) == 2) { strcpy(label_name[label_cnt],lname); return jmp(label_cnt++); } } if(strcmp(opcode, "jeq") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jeq(rs,rt,label_cnt++); } } if(strcmp(opcode, "jne") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jne(rs,rt,label_cnt++); } } if(strcmp(opcode, "jlt") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jlt(rs,rt,label_cnt++); } } if(strcmp(opcode, "jle") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jle(rs,rt,label_cnt++); } } if(strcmp(opcode, "call") == 0){ if(sscanf(op_data, fl, tmp, lname) == 2) { strcpy(label_name[label_cnt],lname); return call(label_cnt++); } } if(strcmp(opcode, "callR") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return callr(rs,0,0,0); } if(strcmp(opcode, "return") == 0){ return _return(0); } if(strcmp(opcode, "ld") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return ld(rs,rt,rd,0); } if(strcmp(opcode, "ldi") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return ldi(rs,rt,imm); } if(strcmp(opcode, "ldlr") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return ldlr(rs,0,imm); } if(strcmp(opcode, "fld") == 0){ if(sscanf(op_data, ffgg, tmp, &rd, &rs,&rt) == 4) return fld(rs,rt,rd,0); } if(strcmp(opcode, "st") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return st(rs,rt,rd,0); } if(strcmp(opcode, "sti") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return sti(rs,rt,imm); } if(strcmp(opcode, "stlr") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return stlr(rs,0,imm); } if(strcmp(opcode, "fst") == 0){ if(sscanf(op_data, ffgg, tmp, &rd, &rs,&rt) == 4) return fst(rs,rt,rd,0); } if(strcmp(opcode, "fadd") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fadd(rs,rt,rd,0); } if(strcmp(opcode, "fsub") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fsub(rs,rt,rd,0); } if(strcmp(opcode, "fmul") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fmul(rs,rt,rd,0); } if(strcmp(opcode, "fdiv") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fdiv(rs,rt,rd,0); } if(strcmp(opcode, "fsqrt") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return fsqrt(rs,0,rd,0); } if(strcmp(opcode, "fabs") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return _fabs(rs,0,rd,0); } if(strcmp(opcode, "fmov") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return fmov(rs,0,rd,0); } if(strcmp(opcode, "fneg") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return fneg(rs,0,rd,0); } if(strcmp(opcode, "fldi") == 0){ if(sscanf(op_data, ffgi, tmp, &rt, &rs, &imm) == 4) return fldi(rs,rt,imm); } if(strcmp(opcode, "fsti") == 0){ if(sscanf(op_data, ffgi, tmp, &rt, &rs, &imm) == 4) return fsti(rs,rt,imm); } if(strcmp(opcode, "fjeq") == 0){ if(sscanf(op_data, fffl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return fjeq(rs,rt,label_cnt++); } } if(strcmp(opcode, "fjlt") == 0){ if(sscanf(op_data, fffl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return fjlt(rs,rt,label_cnt++); } } if(strcmp(opcode, "halt") == 0){ return halt(0,0,0,0); } if(strcmp(opcode, "setL") == 0){ if(sscanf(op_data, fgl, tmp, &rd, lname) == 3) { strcpy(label_name[label_cnt],lname); return setl(0,rd,label_cnt++); } } if(strcmp(opcode, "padd") == 0){ if(sscanf(op_data, fgi, tmp, &rt, &imm) == 3) { return padd(0,rt,imm); } } if(strcmp(opcode, "link") == 0){ if(sscanf(op_data, fi, tmp, &imm) == 2) { return link(0,0,imm); } } if(strcmp(opcode, "movlr") == 0){ return movlr(0,0,0,0); } if(strcmp(opcode, "btmplr") == 0){ return btmplr(0,0,0,0); } /* if(strcmp(opcode, "padd") == 0){ if(sscanf(op_data, fgg, tmp, &rd, &rt) == 3) { return padd(0,rt,d,0); } } */ return -1; }