static void emit_src(struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_src src) { struct nv30_vertprog *vp = vpc->vp; uint32_t sr = 0; struct nvfx_relocation reloc; switch (src.reg.type) { case NVFXSR_TEMP: sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); break; case NVFXSR_INPUT: sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << NVFX_VP(SRC_REG_TYPE_SHIFT)); vp->ir |= (1 << src.reg.index); hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); break; case NVFXSR_CONST: sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << NVFX_VP(SRC_REG_TYPE_SHIFT)); if (src.reg.index < 256 && src.reg.index >= -256) { reloc.location = vp->nr_insns - 1; reloc.target = src.reg.index; util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc); } else {
static void nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn) { struct nvfx_context* nvfx = vpc->nvfx; struct nvfx_vertex_program *vp = vpc->vp; unsigned slot = insn.op >> 7; unsigned op = insn.op & 0x7f; uint32_t *hw; vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); vpc->vpi = &vp->insns[vp->nr_insns - 1]; memset(vpc->vpi, 0, sizeof(*vpc->vpi)); hw = vpc->vpi->data; hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT)); hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT))); if(insn.cc_update) hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE); if(insn.sat) { assert(nvfx->use_nv4x); if(nvfx->use_nv4x) hw[0] |= NV40_VP_INST_SATURATE; } if(!nvfx->is_nv4x) { if(slot == 0) hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); else { hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); } // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); if (insn.dst.type == NVFXSR_OUTPUT) { if (slot) hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); else hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); } else { if (slot) hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); else hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); } } else { if (slot == 0) {
void CCompiler::emit_insn(u8 opcode,struct nvfx_insn *insn) { u32 *hw; u32 slot = opcode>>7; u32 op = opcode&0x7f; m_nCurInstruction = m_nInstructions++; m_pInstructions = (struct vertex_program_exec*)realloc(m_pInstructions,m_nInstructions*sizeof(struct vertex_program_exec)); memset(&m_pInstructions[m_nCurInstruction],0,sizeof(struct vertex_program_exec)); hw = m_pInstructions[m_nCurInstruction].data; emit_dst(hw,slot,insn); emit_src(hw,0,&insn->src[0]); emit_src(hw,1,&insn->src[1]); emit_src(hw,2,&insn->src[2]); hw[0] |= (insn->cc_cond << NVFX_VP(INST_COND_SHIFT)); hw[0] |= (insn->cc_test << NVFX_VP(INST_COND_TEST_SHIFT)); hw[0] |= (insn->cc_test_reg << NVFX_VP(INST_COND_REG_SELECT_SHIFT)); hw[0] |= ((insn->cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | (insn->cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | (insn->cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | (insn->cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT))); if(insn->cc_update) hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE); if(insn->sat) { hw[0] |= NV40_VP_INST_SATURATE; } if (slot == 0) { hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; hw[3] |= (insn->mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); } else { hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ; hw[3] |= (insn->mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); } }
void CCompiler::emit_src(u32 *hw, u8 pos, struct nvfx_src *src) { u32 sr = 0; struct nvfx_relocation reloc; switch(src->reg.type) { case NVFXSR_TEMP: sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); sr |= (src->reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); break; case NVFXSR_INPUT: sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << NVFX_VP(SRC_REG_TYPE_SHIFT)); m_nInputMask |= (1 << src->reg.index); hw[1] |= (src->reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); break; case NVFXSR_CONST: sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << NVFX_VP(SRC_REG_TYPE_SHIFT)); reloc.location = m_nCurInstruction; reloc.target = src->reg.index; m_lConstRelocation.push_back(reloc); break; case NVFXSR_NONE: sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << NVFX_VP(SRC_REG_TYPE_SHIFT)); break; } if (src->negate) sr |= NVFX_VP(SRC_NEGATE); if (src->abs) hw[0] |= (1 << (21 + pos)); sr |= ((src->swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | (src->swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | (src->swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | (src->swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); if(src->indirect) { if(src->reg.type == NVFXSR_CONST) hw[3] |= NVFX_VP(INST_INDEX_CONST); else if(src->reg.type == NVFXSR_INPUT) hw[0] |= NVFX_VP(INST_INDEX_INPUT); if(src->indirect_reg) hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); hw[0] |= src->indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); } switch (pos) { case 0: hw[1] |= (((sr & NVFX_VP(SRC0_HIGH_MASK)) >> NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT)); hw[2] |= ((sr & NVFX_VP(SRC0_LOW_MASK)) << NVFX_VP(INST_SRC0L_SHIFT)); break; case 1: hw[2] |= (sr << NVFX_VP(INST_SRC1_SHIFT)); break; case 2: hw[2] |= (((sr & NVFX_VP(SRC2_HIGH_MASK)) >> NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT)); hw[3] |= ((sr & NVFX_VP(SRC2_LOW_MASK)) << NVFX_VP(INST_SRC2L_SHIFT)); break; } }
void CCompiler::emit_dst(u32 *hw,u8 slot,struct nvfx_insn *insn) { struct nvfx_reg *dst = &insn->dst; switch(dst->type) { case NVFXSR_NONE: hw[3] |= NV40_VP_INST_DEST_MASK; if(slot==0) hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; else hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; break; case NVFXSR_TEMP: hw[3] |= NV40_VP_INST_DEST_MASK; if (slot == 0) hw[0] |= (dst->index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); else hw[3] |= (dst->index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); break; case NVFXSR_OUTPUT: switch (dst->index) { case NV30_VP_INST_DEST_CLP(0): dst->index = NVFX_VP(INST_DEST_FOGC); insn->mask = NVFX_VP_MASK_Y; m_nOutputMask |= (1 << 6); break; case NV30_VP_INST_DEST_CLP(1): dst->index = NVFX_VP(INST_DEST_FOGC); insn->mask = NVFX_VP_MASK_Z; m_nOutputMask |= (1 << 7); break; case NV30_VP_INST_DEST_CLP(2): dst->index = NVFX_VP(INST_DEST_FOGC); insn->mask = NVFX_VP_MASK_W; m_nOutputMask |= (1 << 8); break; case NV30_VP_INST_DEST_CLP(3): dst->index = NVFX_VP(INST_DEST_PSZ); insn->mask = NVFX_VP_MASK_Y; m_nOutputMask |= (1 << 9); break; case NV30_VP_INST_DEST_CLP(4): dst->index = NVFX_VP(INST_DEST_PSZ); insn->mask = NVFX_VP_MASK_Z; m_nOutputMask |= (1 << 10); break; case NV30_VP_INST_DEST_CLP(5): dst->index = NVFX_VP(INST_DEST_PSZ); insn->mask = NVFX_VP_MASK_W; m_nOutputMask |= (1 << 11); break; case NV40_VP_INST_DEST_COL0 : m_nOutputMask |= (1 << 0); break; case NV40_VP_INST_DEST_COL1 : m_nOutputMask |= (1 << 1); break; case NV40_VP_INST_DEST_BFC0 : m_nOutputMask |= (1 << 2); break; case NV40_VP_INST_DEST_BFC1 : m_nOutputMask |= (1 << 3); break; case NV40_VP_INST_DEST_FOGC : m_nOutputMask |= (1 << 4); break; case NV40_VP_INST_DEST_PSZ : m_nOutputMask |= (1 << 5); break; default: if(dst->index>=NV40_VP_INST_DEST_TC(0) && dst->index<=NV40_VP_INST_DEST_TC(7)) m_nOutputMask |= (1<<(dst->index - NV40_VP_INST_DEST_TC0 + 14)); break; } hw[3] |= (dst->index << NV40_VP_INST_DEST_SHIFT); if (slot == 0) { hw[0] |= NV40_VP_INST_VEC_RESULT; hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; } else { hw[3] |= NV40_VP_INST_SCA_RESULT; hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; } break; } }
static void emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_reg dst) { struct nvfx_vertex_program *vp = vpc->vp; switch (dst.type) { case NVFXSR_NONE: if(!nvfx->is_nv4x) hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK; else { hw[3] |= NV40_VP_INST_DEST_MASK; if (slot == 0) hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; else hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; } break; case NVFXSR_TEMP: if(!nvfx->is_nv4x) hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); else { hw[3] |= NV40_VP_INST_DEST_MASK; if (slot == 0) hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); else hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); } break; case NVFXSR_OUTPUT: /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ if(nvfx->is_nv4x) { switch (dst.index) { case NV30_VP_INST_DEST_CLP(0): dst.index = NVFX_VP(INST_DEST_FOGC); break; case NV30_VP_INST_DEST_CLP(1): dst.index = NVFX_VP(INST_DEST_FOGC); break; case NV30_VP_INST_DEST_CLP(2): dst.index = NVFX_VP(INST_DEST_FOGC); break; case NV30_VP_INST_DEST_CLP(3): dst.index = NVFX_VP(INST_DEST_PSZ); break; case NV30_VP_INST_DEST_CLP(4): dst.index = NVFX_VP(INST_DEST_PSZ); break; case NV30_VP_INST_DEST_CLP(5): dst.index = NVFX_VP(INST_DEST_PSZ); break; case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; } } if(!nvfx->is_nv4x) { hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK; /*XXX: no way this is entirely correct, someone needs to * figure out what exactly it is. */ hw[3] |= 0x800; } else { hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); if (slot == 0) { hw[0] |= NV40_VP_INST_VEC_RESULT; hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; } else { hw[3] |= NV40_VP_INST_SCA_RESULT; hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; } } break; default: assert(0); } }
static void emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_src src) { struct nvfx_vertex_program *vp = vpc->vp; uint32_t sr = 0; struct nvfx_relocation reloc; switch (src.reg.type) { case NVFXSR_TEMP: sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); break; case NVFXSR_INPUT: sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << NVFX_VP(SRC_REG_TYPE_SHIFT)); vp->ir |= (1 << src.reg.index); hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); break; case NVFXSR_CONST: sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << NVFX_VP(SRC_REG_TYPE_SHIFT)); reloc.location = vp->nr_insns - 1; reloc.target = src.reg.index; util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc); break; case NVFXSR_NONE: sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << NVFX_VP(SRC_REG_TYPE_SHIFT)); break; default: assert(0); } if (src.negate) sr |= NVFX_VP(SRC_NEGATE); if (src.abs) hw[0] |= (1 << (21 + pos)); sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); if(src.indirect) { if(src.reg.type == NVFXSR_CONST) hw[3] |= NVFX_VP(INST_INDEX_CONST); else if(src.reg.type == NVFXSR_INPUT) hw[0] |= NVFX_VP(INST_INDEX_INPUT); else assert(0); if(src.indirect_reg) hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); } switch (pos) { case 0: hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << NVFX_VP(INST_SRC0L_SHIFT); break; case 1: hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); break; case 2: hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << NVFX_VP(INST_SRC2L_SHIFT); break; default: assert(0); } }