static boolean nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, const struct tgsi_full_instruction *finst) { struct nv30_sreg src[3], dst, tmp; struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); int mask; int ai = -1, ci = -1; int i; if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; vpc->temp_temp_count = 0; for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; fsrc = &finst->FullSrcRegisters[i]; if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; fsrc = &finst->FullSrcRegisters[i]; switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->SrcRegister.Index) { ai = fsrc->SrcRegister.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); arith(vpc, 0, OP_MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; /*XXX: index comparison is broken now that consts come from * two different register files. */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: if (ci == -1 || ci == fsrc->SrcRegister.Index) { ci = fsrc->SrcRegister.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); arith(vpc, 0, OP_MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; case TGSI_FILE_TEMPORARY: /* handled above */ break; default: NOUVEAU_ERR("bad src file\n"); return FALSE; } } dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); break; case TGSI_OPCODE_ADD: arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); break; case TGSI_OPCODE_ARL: arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DP3: arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DP4: arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DPH: arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DST: arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_EX2: arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_EXP: arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_FLR: arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); break; case TGSI_OPCODE_FRC: arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); break; case TGSI_OPCODE_LG2: arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LIT: arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LOG: arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_MAD: arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAX: arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MIN: arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MOV: arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); break; case TGSI_OPCODE_MUL: arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: tmp = temp(vpc); arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, swz(src[0], X, X, X, X)); arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); arith(vpc, 1, OP_EX2, dst, mask, none, none, swz(tmp, X, X, X, X)); break; case TGSI_OPCODE_RCP: arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_RET: break; case TGSI_OPCODE_RSQ: arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_SGE: arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SGT: arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SLT: arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SUB: arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); break; case TGSI_OPCODE_XPD: tmp = temp(vpc); arith(vpc, 0, OP_MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; default: NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); return FALSE; } return TRUE; }
void CCompiler::Compile(CParser *pParser) { struct nvfx_src tmp; struct nvfx_relocation reloc; std::vector<u32> insns_pos; std::list<struct nvfx_relocation> label_reloc; int i,nICount = pParser->GetInstructionCount(); struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE,0)); struct nvfx_insn tmp_insn,*insns = pParser->GetInstructions(); Prepare(pParser); for(i=0;i<nICount;i++) { /* u32 idx = (u32)insns_pos.size(); */ struct nvfx_insn *insn = &insns[i]; insns_pos.push_back(m_nInstructions); switch(insn->op) { case OPCODE_NOP: tmp_insn = arith(0,none.reg,0,none,none,none); emit_insn(gen_op(NOP,VEC),&tmp_insn); break; case OPCODE_ABS: tmp_insn = arith_ctor(insn,insn->dst,abs(insn->src[0]),none,none); emit_insn(gen_op(MOV,VEC),&tmp_insn); break; case OPCODE_ADD: emit_insn(gen_op(ADD,VEC),insn); break; case OPCODE_ARA: break; case OPCODE_ARL: break; case OPCODE_ARR: break; case OPCODE_BRA: reloc.location = m_nInstructions; reloc.target = insn->dst.index; label_reloc.push_back(reloc); tmp_insn = arith(0,none.reg,0,none,none,none); emit_insn(gen_op(BRA,SCA),&tmp_insn); break; case OPCODE_CAL: reloc.location = m_nInstructions; reloc.target = insn->dst.index; label_reloc.push_back(reloc); tmp_insn = arith(0,none.reg,0,none,none,none); emit_insn(gen_op(CAL,SCA),&tmp_insn); break; case OPCODE_COS: emit_insn(gen_op(COS,SCA),insn); break; case OPCODE_DP3: emit_insn(gen_op(DP3,VEC),insn); break; case OPCODE_DP4: emit_insn(gen_op(DP4,VEC),insn); break; case OPCODE_DPH: emit_insn(gen_op(DPH,VEC),insn); break; case OPCODE_DST: emit_insn(gen_op(DST,VEC),insn); break; case OPCODE_EX2: emit_insn(gen_op(EX2,SCA),insn); break; case OPCODE_EXP: emit_insn(gen_op(EXP,SCA),insn); break; case OPCODE_FLR: emit_insn(gen_op(FLR,VEC),insn); break; case OPCODE_FRC: emit_insn(gen_op(FRC,VEC),insn); break; case OPCODE_LG2: emit_insn(gen_op(LG2,SCA),insn); break; case OPCODE_LIT: emit_insn(gen_op(LIT,SCA),insn); break; case OPCODE_LOG: emit_insn(gen_op(LOG,SCA),insn); break; case OPCODE_MAD: emit_insn(gen_op(MAD,VEC),insn); break; case OPCODE_MAX: emit_insn(gen_op(MAX,VEC),insn); break; case OPCODE_MIN: emit_insn(gen_op(MIN,VEC),insn); break; case OPCODE_MOV: emit_insn(gen_op(MOV,VEC),insn); break; case OPCODE_MUL: emit_insn(gen_op(MUL,VEC),insn); break; case OPCODE_POW: tmp = nvfx_src(temp()); tmp_insn = arith(0, tmp.reg, NVFX_VP_MASK_X, none, none, insn->src[0]); emit_insn(gen_op(LG2,SCA),&tmp_insn); tmp_insn = arith(0, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), insn->src[1], none); emit_insn(gen_op(MUL,VEC),&tmp_insn); tmp_insn = arith_ctor(insn, insn->dst, none, none, swz(tmp, X, X, X, X)); emit_insn(gen_op(EX2,SCA),&tmp_insn); break; case OPCODE_RCC: emit_insn(gen_op(RCC,SCA),insn); break; case OPCODE_RCP: emit_insn(gen_op(RCP,SCA),insn); break; case OPCODE_RSQ: emit_insn(gen_op(RSQ,SCA),insn); break; case OPCODE_SEQ: emit_insn(gen_op(SEQ,VEC),insn); break; case OPCODE_SFL: emit_insn(gen_op(SFL,VEC),insn); break; case OPCODE_SGE: emit_insn(gen_op(SGE,VEC),insn); break; case OPCODE_SGT: emit_insn(gen_op(SGT,VEC),insn); break; case OPCODE_SIN: emit_insn(gen_op(SIN,SCA),insn); break; case OPCODE_SLE: emit_insn(gen_op(SLE,VEC),insn); break; case OPCODE_SLT: emit_insn(gen_op(SLT,VEC),insn); break; case OPCODE_SNE: emit_insn(gen_op(SNE,VEC),insn); break; case OPCODE_SSG: emit_insn(gen_op(SSG,VEC),insn); break; case OPCODE_STR: emit_insn(gen_op(STR,VEC),insn); break; case OPCODE_SUB: tmp_insn = arith_ctor(insn,insn->dst,insn->src[0],none,neg(insn->src[2])); emit_insn(gen_op(ADD,VEC),&tmp_insn); break; case OPCODE_END: if(m_nInstructions) m_pInstructions[m_nCurInstruction].data[3] |= NVFX_VP_INST_LAST; else { tmp_insn = arith(0,none.reg,0,none,none,none); emit_insn(gen_op(NOP,VEC),&tmp_insn); m_pInstructions[m_nCurInstruction].data[3] |= NVFX_VP_INST_LAST; } break; } release_temps(); } for(std::list<struct nvfx_relocation>::iterator it = label_reloc.begin();it!=label_reloc.end();it++) { struct nvfx_relocation hw_reloc; hw_reloc.location = it->location; hw_reloc.target = insns_pos[it->target]; m_lBranchRelocation.push_back(hw_reloc); } }
void CCompilerFP::Compile(CParser *pParser) { int i,nCount = pParser->GetInstructionCount(); struct nvfx_insn tmp_insn,*insns = pParser->GetInstructions(); struct nvfx_src tmp,none = nvfx_src(nvfx_reg(NVFXSR_NONE,0)); Prepare(pParser); for(i=0;i<nCount;i++) { struct nvfx_insn *insn = &insns[i]; switch(insn->op) { case OPCODE_ADD: emit_insn(NVFX_FP_OP_OPCODE_ADD,insn); break; case OPCODE_BRK: emit_brk(insn); break; case OPCODE_COS: emit_insn(NVFX_FP_OP_OPCODE_COS,insn); break; case OPCODE_DP3: emit_insn(NVFX_FP_OP_OPCODE_DP3,insn); break; case OPCODE_DP4: emit_insn(NVFX_FP_OP_OPCODE_DP4,insn); break; case OPCODE_EX2: emit_insn(NVFX_FP_OP_OPCODE_EX2,insn); break; case OPCODE_LG2: emit_insn(NVFX_FP_OP_OPCODE_LG2,insn); break; case OPCODE_LRP: tmp = nvfx_src(temp()); tmp_insn = arith(0,tmp.reg,insn->mask,neg(insn->src[0]),insn->src[2],insn->src[2]); emit_insn(NVFX_FP_OP_OPCODE_MAD,&tmp_insn); tmp_insn = arith(insn->sat,insn->dst,insn->mask,insn->src[0],insn->src[1],tmp); emit_insn(NVFX_FP_OP_OPCODE_MAD,&tmp_insn); break; case OPCODE_MAX: emit_insn(NVFX_FP_OP_OPCODE_MAX,insn); break; case OPCODE_MIN: emit_insn(NVFX_FP_OP_OPCODE_MIN,insn); break; case OPCODE_MAD: emit_insn(NVFX_FP_OP_OPCODE_MAD,insn); break; case OPCODE_MOV: emit_insn(NVFX_FP_OP_OPCODE_MOV,insn); break; case OPCODE_MUL: emit_insn(NVFX_FP_OP_OPCODE_MUL,insn); break; case OPCODE_POW: tmp = nvfx_src(temp()); tmp_insn = arith(0,tmp.reg, NVFX_FP_MASK_X, insn->src[0], none, none); emit_insn(NVFX_FP_OP_OPCODE_LG2,&tmp_insn); tmp_insn = arith(0,tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X),insn->src[1], none); emit_insn(NVFX_FP_OP_OPCODE_MUL,&tmp_insn); tmp_insn = arith_ctor(insn,insn->dst,swz(tmp, X, X, X, X), none, none); emit_insn(NVFX_FP_OP_OPCODE_EX2,&tmp_insn); break; case OPCODE_RCP: emit_insn(NVFX_FP_OP_OPCODE_RCP,insn); break; case OPCODE_RSQ: tmp = nvfx_src(temp()); tmp_insn = arith(0,tmp.reg,NVFX_FP_MASK_X,abs(insn->src[0]),none,none); tmp_insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X; emit_insn(NVFX_FP_OP_OPCODE_LG2,&tmp_insn); tmp_insn = arith_ctor(insn,insn->dst,neg(swz(tmp,X,X,X,X)),none,none); emit_insn(NVFX_FP_OP_OPCODE_EX2,&tmp_insn); break; case OPCODE_SEQ: emit_insn(NVFX_FP_OP_OPCODE_SEQ,insn); break; case OPCODE_SFL: emit_insn(NVFX_FP_OP_OPCODE_SFL,insn); break; case OPCODE_SGE: emit_insn(NVFX_FP_OP_OPCODE_SGE,insn); break; case OPCODE_SGT: emit_insn(NVFX_FP_OP_OPCODE_SGT,insn); break; case OPCODE_SIN: emit_insn(NVFX_FP_OP_OPCODE_SIN,insn); break; case OPCODE_SLE: emit_insn(NVFX_FP_OP_OPCODE_SLE,insn); break; case OPCODE_SLT: emit_insn(NVFX_FP_OP_OPCODE_SLT,insn); break; case OPCODE_SNE: emit_insn(NVFX_FP_OP_OPCODE_SNE,insn); break; case OPCODE_TEX: emit_insn(NVFX_FP_OP_OPCODE_TEX,insn); break; case OPCODE_TXB: emit_insn(NVFX_FP_OP_OPCODE_TXB,insn); break; case OPCODE_TXL: emit_insn(NVFX_FP_OP_OPCODE_TXL_NV40,insn); break; case OPCODE_TXP: emit_insn(NVFX_FP_OP_OPCODE_TXP,insn); break; case OPCODE_BGNREP: emit_rep(insn); break; case OPCODE_ENDREP: fixup_rep(); break; case OPCODE_END: if(m_nInstructions) m_pInstructions[m_nCurInstruction].data[0] |= NVFX_FP_OP_PROGRAM_END; else { m_nCurInstruction = m_nInstructions; grow_insns(1); m_pInstructions[m_nCurInstruction].data[0] = 0x00000001; m_pInstructions[m_nCurInstruction].data[1] = 0x00000000; m_pInstructions[m_nCurInstruction].data[2] = 0x00000000; m_pInstructions[m_nCurInstruction].data[3] = 0x00000000; } } release_temps(); } }