void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { int max_reg = -1; struct rc_instruction * tmp; memset(s, 0, sizeof(*s)); for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next){ const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); if (tmp->Type == RC_INSTRUCTION_NORMAL) { if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) s->num_presub_ops++; info = rc_get_opcode_info(tmp->U.I.Opcode); } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; /* Assuming alpha will never be a flow control or * a tex instruction. */ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) s->num_alpha_insts++; if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) s->num_rgb_insts++; info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) s->num_fc_insts++; if (info->HasTexture) s->num_tex_insts++; s->num_insts++; } s->num_temp_regs = max_reg + 1; }
static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; case RC_OPCODE_CND: return R500_ALPHA_OP_CND; case RC_OPCODE_COS: return R500_ALPHA_OP_COS; case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; default: error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); /* fall through */ case RC_OPCODE_NOP: /* fall through */ case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; } }
static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { unsigned int refmask = 0; if (inst->SrcReg[src].File == RC_FILE_NONE) return; for(unsigned int chan = 0; chan < 4; ++chan) refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan); refmask &= RC_MASK_XYZW; for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_BIT(refmask, chan)) { cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); } } if (refmask && inst->SrcReg[src].RelAddr) cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); } }
/* This function calls the callback function (cb) for each source used by * the instruction. * */ void rc_for_all_reads_src( struct rc_instruction * inst, rc_read_src_fn cb, void * userdata) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); /* This function only works with normal instructions. */ if (inst->Type != RC_INSTRUCTION_NORMAL) { assert(0); return; } for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { if (inst->U.I.SrcReg[src].File == RC_FILE_NONE) continue; if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { unsigned int i; unsigned int srcp_regs = rc_presubtract_src_reg_count( inst->U.I.PreSub.Opcode); for( i = 0; i < srcp_regs; i++) { cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); } } else { cb(userdata, inst, &inst->U.I.SrcReg[src]); } } }
/** * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. * Gallium and OpenGL define it the other way around. * * So let's just negate FACE at the beginning of the shader and rewrite the rest * of the shader to read from the newly allocated temporary. */ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) { unsigned tempregi = rc_find_free_temporary(c); struct rc_instruction *inst_add; struct rc_instruction *inst; /* perspective divide */ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = tempregi; inst_add->U.I.DstReg.WriteMask = RC_MASK_X; inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; inst_add->U.I.SrcReg[1].Index = face; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == face) { inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = tempregi; } } } }
static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned int reg; unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); for (unsigned i = 0; i < spaces; i++) fprintf(f, " "); fprintf(f, "%s", opcode->Name); switch(inst->U.I.SaturateMode) { case RC_SATURATE_NONE: break; case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; default: fprintf(f, "_BAD_SAT"); break; } if (opcode->HasDstReg) { fprintf(f, " "); rc_print_dst_register(f, inst->U.I.DstReg); print_omod_op(f, inst->U.I.Omod); if (opcode->NumSrcRegs) fprintf(f, ","); } for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { if (reg > 0) fprintf(f, ","); fprintf(f, " "); rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); } if (opcode->HasTexture) { fprintf(f, ", %s%s[%u]%s%s", textarget_to_string(inst->U.I.TexSrcTarget), inst->U.I.TexShadow ? "SHADOW" : "", inst->U.I.TexSrcUnit, inst->U.I.TexSemWait ? " SEM_WAIT" : "", inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : ""); } fprintf(f, ";"); if (inst->U.I.WriteALUResult) { fprintf(f, " [aluresult = ("); rc_print_comparefunc(f, (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", inst->U.I.ALUResultCompare, "0"); fprintf(f, ")]"); } if (inst->U.I.DstReg.Pred == RC_PRED_SET) { fprintf(f, " PRED_SET"); } else if (inst->U.I.DstReg.Pred == RC_PRED_INV) { fprintf(f, " PRED_INV"); } fprintf(f, "\n"); }
void rc_pair_foreach_source_that_rgb_reads( struct rc_pair_instruction * pair, void * data, rc_pair_foreach_src_fn cb) { unsigned int i; const struct rc_opcode_info * info = rc_get_opcode_info(pair->RGB.Opcode); for(i = 0; i < info->NumSrcRegs; i++) { unsigned int chan; unsigned int swz = RC_SWIZZLE_UNUSED; /* Find a swizzle that is either X,Y,Z,or W. We assume here * that if one channel swizzles X,Y, or Z, then none of the * other channels swizzle W, and vice-versa. */ for(chan = 0; chan < 4; chan++) { swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) continue; } pair_foreach_source_callback(pair, data, cb, swz, pair->RGB.Arg[i].Source); } }
static void transform_negative_addressing(struct r300_vertex_program_compiler *c, struct rc_instruction *arl, struct rc_instruction *end, int min_offset) { struct rc_instruction *inst, *add; unsigned const_swizzle; /* Transform ARL */ add = rc_insert_new_instruction(&c->Base, arl->Prev); add->U.I.Opcode = RC_OPCODE_ADD; add->U.I.DstReg.File = RC_FILE_TEMPORARY; add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); add->U.I.DstReg.WriteMask = RC_MASK_X; add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, min_offset, &const_swizzle); add->U.I.SrcReg[1].Swizzle = const_swizzle; arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; /* Rewrite offsets up to and excluding inst. */ for (inst = arl->Next; inst != end; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (unsigned i = 0; i < opcode->NumSrcRegs; i++) if (inst->U.I.SrcReg[i].RelAddr) inst->U.I.SrcReg[i].Index -= min_offset; } }
/** * R3xx-R4xx vertex engine does not support the Absolute source operand modifier * and the Saturate opcode modifier. Only Absolute is currently transformed. */ static int transform_nonnative_modifiers( struct radeon_compiler *c, struct rc_instruction *inst, void* unused) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; /* Transform ABS(a) to MAX(a, -a). */ for (i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].Abs) { struct rc_instruction *new_inst; unsigned temp; inst->U.I.SrcReg[i].Abs = 0; temp = rc_find_free_temporary(c); new_inst = rc_insert_new_instruction(c, inst->Prev); new_inst->U.I.Opcode = RC_OPCODE_MAX; new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; new_inst->U.I.DstReg.Index = temp; new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = temp; inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; } } return 1; }
/** * Rewrite the program such that everything that source the given input * register will source new_input instead. */ void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) { struct rc_instruction * inst; c->Program.InputsRead &= ~(1 << input); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { inst->U.I.SrcReg[i].File = new_input.File; inst->U.I.SrcReg[i].Index = new_input.Index; inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); if (!inst->U.I.SrcReg[i].Abs) { inst->U.I.SrcReg[i].Negate ^= new_input.Negate; inst->U.I.SrcReg[i].Abs = new_input.Abs; } c->Program.InputsRead |= 1 << new_input.Index; } } } }
void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; struct emit_state s; struct r500_fragment_program_code *code = &compiler->code->code.r500; memset(&s, 0, sizeof(s)); s.C = &compiler->Base; s.Code = code; memset(code, 0, sizeof(*code)); code->max_temp_idx = 1; code->inst_end = -1; for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) { if (inst->Type == RC_INSTRUCTION_NORMAL) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->IsFlowControl) { emit_flowcontrol(&s, inst); } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { continue; } else { emit_tex(compiler, &inst->U.I); } } else { emit_paired(compiler, &inst->U.P); } } if (code->max_temp_idx >= compiler->Base.max_temp_regs) rc_error(&compiler->Base, "Too many hardware temporaries used"); if (compiler->Base.Error) return; if (code->inst_end == -1 || (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ if (code->inst_end >= compiler->Base.max_alu_insts-1) { rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); return; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } /* Enable full flow control mode if we are using loops or have if * statements nested at least four deep. */ if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { if (code->max_temp_idx < 1) code->max_temp_idx = 1; code->us_fc_ctrl |= R500_FC_FULL_FC_EN; } }
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) { struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned int src; for(src = 0; src < opcode->NumSrcRegs; ++src) { struct rc_src_register *reg = &inst->U.I.SrcReg[src]; if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) { continue; } if (!c->is_r500 && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && try_rewrite_constant(c, reg)) { continue; } rewrite_source(c, inst, src); } } if (c->Debug & RC_DBG_LOG) rc_constants_print(&c->Program.Constants); }
static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) { struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct rc_instruction *rci; for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { struct rc_sub_instruction * inst = &rci->U.I; unsigned i; const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; if (inst->DstReg.WriteMask & RC_MASK_Z) { inst->DstReg.WriteMask = RC_MASK_W; } else { inst->DstReg.WriteMask = 0; continue; } if (!info->IsComponentwise) { continue; } for (i = 0; i < info->NumSrcRegs; i++) { inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); } } }
static int is_controlflow(struct rc_instruction * inst) { if (inst->Type == RC_INSTRUCTION_NORMAL) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); return opcode->IsFlowControl; } return 0; }
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { struct rc_instruction * tmp; memset(s, 0, sizeof(*s)); for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next){ const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, s); if (tmp->Type == RC_INSTRUCTION_NORMAL) { info = rc_get_opcode_info(tmp->U.I.Opcode); if (info->Opcode == RC_OPCODE_BEGIN_TEX) continue; if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) s->num_presub_ops++; } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; /* Assuming alpha will never be a flow control or * a tex instruction. */ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) s->num_alpha_insts++; if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) s->num_rgb_insts++; if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { s->num_omod_ops++; } if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { s->num_omod_ops++; } info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) s->num_fc_insts++; if (info->HasTexture) s->num_tex_insts++; s->num_insts++; } /* Increment here because the reg_count_callback store the max * temporary reg index in s->nun_temp_regs. */ s->num_temp_regs++; }
static void get_incr_amount(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { struct count_inst * count_inst = data; int amnt_src_index; const struct rc_opcode_info * opcode; float amount; if(file != RC_FILE_TEMPORARY || count_inst->Index != index || (1 << GET_SWZ(count_inst->Swz,0) != mask)){ return; } /* Find the index of the counter register. */ opcode = rc_get_opcode_info(inst->U.I.Opcode); if(opcode->NumSrcRegs != 2){ count_inst->Unknown = 1; return; } if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].Index == count_inst->Index && inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ amnt_src_index = 1; } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && inst->U.I.SrcReg[1].Index == count_inst->Index && inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ amnt_src_index = 0; } else{ count_inst->Unknown = 1; return; } if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], count_inst->C)){ amount = get_constant_value(count_inst->C, &inst->U.I.SrcReg[amnt_src_index], 0); } else{ count_inst->Unknown = 1 ; return; } switch(inst->U.I.Opcode){ case RC_OPCODE_ADD: count_inst->Amount += amount; break; case RC_OPCODE_SUB: if(amnt_src_index == 0){ count_inst->Unknown = 0; return; } count_inst->Amount -= amount; break; default: count_inst->Unknown = 1; return; } }
static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); if (opcode->HasDstReg && inst->DstReg.WriteMask) cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); if (inst->WriteALUResult) cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); }
static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; unsigned int refmasks[3] = { 0, 0, 0 }; if (inst->RGB.Opcode != RC_OPCODE_NOP) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { for(unsigned int chan = 0; chan < 3; ++chan) { unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); if (swz < 4) refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz; } } } if (inst->Alpha.Opcode != RC_OPCODE_NOP) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { if (inst->Alpha.Arg[arg].Swizzle < 4) refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle; } } for(unsigned int src = 0; src < 3; ++src) { if (inst->RGB.Src[src].Used) { for(unsigned int chan = 0; chan < 3; ++chan) { if (GET_BIT(refmasks[src], chan)) cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); } } if (inst->Alpha.Src[src].Used) { if (GET_BIT(refmasks[src], 3)) cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); } } }
void rc_pair_foreach_source_that_alpha_reads( struct rc_pair_instruction * pair, void * data, rc_pair_foreach_src_fn cb) { unsigned int i; const struct rc_opcode_info * info = rc_get_opcode_info(pair->Alpha.Opcode); for(i = 0; i < info->NumSrcRegs; i++) { pair_foreach_source_callback(pair, data, cb, GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), pair->Alpha.Arg[i].Source); } }
/** * Emit a single TEX instruction */ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { PROG_CODE; if (code->inst_end >= c->Base.max_alu_insts-1) { error("emit_tex: Too many instructions"); return 0; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX | (inst->DstReg.WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; if (inst->TexSrcTarget == RC_TEXTURE_RECT) code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { case RC_OPCODE_KIL: code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; break; case RC_OPCODE_TEX: code->inst[ip].inst1 |= R500_TEX_INST_LD; break; case RC_OPCODE_TXB: code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; break; case RC_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); } use_temporary(code, inst->SrcReg[0].Index); if (inst->Opcode != RC_OPCODE_KIL) use_temporary(code, inst->DstReg.Index); code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) | R500_TEX_DST_ADDR(inst->DstReg.Index) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; return 1; }
static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); if (opcode->HasDstReg) { for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_BIT(inst->DstReg.WriteMask, chan)) cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); } } if (inst->WriteALUResult) cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); }
static void remap_normal_instruction(struct rc_instruction * fullinst, rc_remap_register_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); unsigned int remapped_presub = 0; if (opcode->HasDstReg) { rc_register_file file = inst->DstReg.File; unsigned int index = inst->DstReg.Index; cb(userdata, fullinst, &file, &index); inst->DstReg.File = file; inst->DstReg.Index = index; } for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { rc_register_file file = inst->SrcReg[src].File; unsigned int index = inst->SrcReg[src].Index; if (file == RC_FILE_PRESUB) { unsigned int i; unsigned int srcp_srcs = rc_presubtract_src_reg_count( inst->PreSub.Opcode); /* Make sure we only remap presubtract sources once in * case more than one source register reads the * presubtract result. */ if (remapped_presub) continue; for(i = 0; i < srcp_srcs; i++) { file = inst->PreSub.SrcReg[i].File; index = inst->PreSub.SrcReg[i].Index; cb(userdata, fullinst, &file, &index); inst->PreSub.SrcReg[i].File = file; inst->PreSub.SrcReg[i].Index = index; } remapped_presub = 1; } else { cb(userdata, fullinst, &file, &index); inst->SrcReg[src].File = file; inst->SrcReg[src].Index = index; } } }
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) { unsigned int unit; unsigned int dest; unsigned int opcode; PROG_CODE; if (code->tex.length >= emit->compiler->Base.max_tex_insts) { error("Too many TEX instructions"); return 0; } unit = inst->U.I.TexSrcUnit; dest = inst->U.I.DstReg.Index; switch(inst->U.I.Opcode) { case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; default: error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); return 0; } if (inst->U.I.Opcode == RC_OPCODE_KIL) { unit = 0; dest = 0; } else { use_temporary(code, dest); } use_temporary(code, inst->U.I.SrcReg[0].Index); code->tex.inst[code->tex.length++] = ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) & R300_SRC_ADDR_MASK) | ((dest << R300_DST_ADDR_SHIFT) & R300_DST_ADDR_MASK) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT) | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? R400_SRC_ADDR_EXT_BIT : 0) | (dest >= R300_PFS_NUM_TEMP_REGS ? R400_DST_ADDR_EXT_BIT : 0) ; return 1; }
static void pair_sub_for_all_args( struct rc_instruction * fullinst, struct rc_pair_sub_instruction * sub, rc_pair_read_arg_fn cb, void * userdata) { int i; const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for(i = 0; i < info->NumSrcRegs; i++) { unsigned int src_type; src_type = rc_source_type_swz(sub->Arg[i].Swizzle); if (src_type == RC_SOURCE_NONE) continue; if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { unsigned int presub_type; unsigned int presub_src_count; struct rc_pair_instruction_source * src_array; unsigned int j; if (src_type & RC_SOURCE_RGB) { presub_type = fullinst-> U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; src_array = fullinst->U.P.RGB.Src; } else { presub_type = fullinst-> U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; src_array = fullinst->U.P.Alpha.Src; } presub_src_count = rc_presubtract_src_reg_count(presub_type); for(j = 0; j < presub_src_count; j++) { cb(userdata, fullinst, &sub->Arg[i], &src_array[j]); } } else { struct rc_pair_instruction_source * src = rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); if (src) { cb(userdata, fullinst, &sub->Arg[i], src); } } } }
static void mark_used( struct rc_instruction * inst, struct rc_pair_sub_instruction * sub) { unsigned int i; const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for (i = 0; i < info->NumSrcRegs; i++) { unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); if (src_type & RC_SOURCE_RGB) { inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; } if (src_type & RC_SOURCE_ALPHA) { inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; } } }
static int is_dst_safe_to_reuse(struct rc_instruction *inst) { const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; assert(info->HasDstReg); if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) return 0; for (i = 0; i < info->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) return 0; } return 1; }
/** * Rewrite the program such that everything that writes into the given * output register will instead write to new_output. The new_output * writemask is honoured. */ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) { struct rc_instruction * inst; c->Program.OutputsWritten &= ~(1 << output); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { inst->U.I.DstReg.Index = new_output; inst->U.I.DstReg.WriteMask &= writemask; c->Program.OutputsWritten |= 1 << new_output; } } } }
static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) { switch(opcode) { case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; case RC_OPCODE_CND: return R300_ALU_OUTC_CND; case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; default: error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); /* fall through */ case RC_OPCODE_NOP: /* fall through */ case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; } }
/** * Translate all ALU instructions into corresponding pair instructions, * performing no other changes. */ void rc_pair_translate(struct r300_fragment_program_compiler *c) { for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { if (inst->Type != RC_INSTRUCTION_NORMAL) continue; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) continue; struct rc_sub_instruction copy = inst->U.I; final_rewrite(©); inst->Type = RC_INSTRUCTION_PAIR; set_pair_instruction(c, &inst->U.P, ©); } }
static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; default: error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); /* fall through */ case RC_OPCODE_NOP: /* fall through */ case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; } }