static void projective_divide(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst) { struct rc_instruction *inst_mul, *inst_rcp; unsigned temp = rc_find_free_temporary(&compiler->Base); inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = temp; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; /* Because the input can be arbitrarily swizzled, * read the component mapped to W. */ inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = temp; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[1].Index = temp; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.Opcode = RC_OPCODE_TEX; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; }
/** * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. * Gallium and OpenGL define it the other way around. * * So let's just negate FACE at the beginning of the shader and rewrite the rest * of the shader to read from the newly allocated temporary. */ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) { unsigned tempregi = rc_find_free_temporary(c); struct rc_instruction *inst_add; struct rc_instruction *inst; /* perspective divide */ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = tempregi; inst_add->U.I.DstReg.WriteMask = RC_MASK_X; inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; inst_add->U.I.SrcReg[1].Index = face; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == face) { inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = tempregi; } } } }
/** * R3xx-R4xx vertex engine does not support the Absolute source operand modifier * and the Saturate opcode modifier. Only Absolute is currently transformed. */ static int transform_nonnative_modifiers( struct radeon_compiler *c, struct rc_instruction *inst, void* unused) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; /* Transform ABS(a) to MAX(a, -a). */ for (i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].Abs) { struct rc_instruction *new_inst; unsigned temp; inst->U.I.SrcReg[i].Abs = 0; temp = rc_find_free_temporary(c); new_inst = rc_insert_new_instruction(c, inst->Prev); new_inst->U.I.Opcode = RC_OPCODE_MAX; new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; new_inst->U.I.DstReg.Index = temp; new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = temp; inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; } } return 1; }
static void transform_negative_addressing(struct r300_vertex_program_compiler *c, struct rc_instruction *arl, struct rc_instruction *end, int min_offset) { struct rc_instruction *inst, *add; unsigned const_swizzle; /* Transform ARL */ add = rc_insert_new_instruction(&c->Base, arl->Prev); add->U.I.Opcode = RC_OPCODE_ADD; add->U.I.DstReg.File = RC_FILE_TEMPORARY; add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); add->U.I.DstReg.WriteMask = RC_MASK_X; add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, min_offset, &const_swizzle); add->U.I.SrcReg[1].Swizzle = const_swizzle; arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; /* Rewrite offsets up to and excluding inst. */ for (inst = arl->Next; inst != end; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (unsigned i = 0; i < opcode->NumSrcRegs; i++) if (inst->U.I.SrcReg[i].RelAddr) inst->U.I.SrcReg[i].Index -= min_offset; } }
static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) { struct rc_instruction * dst; int i; if (src->Instruction.Opcode == TGSI_OPCODE_END) return; dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); dst->U.I.Opcode = translate_opcode(src->Instruction.Opcode); dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); if (src->Instruction.NumDstRegs) transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; else transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]); } /* Texturing. */ transform_texture(dst, src->InstructionExtTexture); }
static void lower_texture_rect(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst) { struct rc_instruction *inst_rect; unsigned temp = rc_find_free_temporary(&compiler->Base); if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) { inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_rect->U.I.Opcode = RC_OPCODE_MUL; inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rect->U.I.DstReg.Index = temp; inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_rect->U.I.SrcReg[1].Index = rc_constants_add_state(&compiler->Base.Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; inst->U.I.TexSrcTarget = RC_TEXTURE_2D; } }
void add_instruction(struct radeon_compiler *c, const char * inst_string) { struct rc_instruction * new_inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); parse_rc_normal_instruction(new_inst, inst_string); }
static void rewrite_source(struct radeon_compiler * c, struct rc_instruction * inst, unsigned src) { struct rc_swizzle_split split; unsigned int tempreg = rc_find_free_temporary(c); unsigned int usemask; usemask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) usemask |= 1 << chan; } c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); unsigned int phase_refmask; unsigned int masked_negate; mov->U.I.Opcode = RC_OPCODE_MOV; mov->U.I.DstReg.File = RC_FILE_TEMPORARY; mov->U.I.DstReg.Index = tempreg; mov->U.I.DstReg.WriteMask = split.Phase[phase]; mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; mov->U.I.PreSub = inst->U.I.PreSub; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(split.Phase[phase], chan)) SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); else phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); } phase_refmask &= RC_MASK_XYZW; masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; if (masked_negate == 0) mov->U.I.SrcReg[0].Negate = 0; else if (masked_negate == split.Phase[phase]) mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; } inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[src].Index = tempreg; inst->U.I.SrcReg[src].Swizzle = 0; inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; inst->U.I.SrcReg[src].Abs = 0; for(unsigned int chan = 0; chan < 4; ++chan) { SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); } }
/** * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. */ static int transform_source_conflicts( struct radeon_compiler *c, struct rc_instruction* inst, void* unused) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->NumSrcRegs == 3) { if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = tmpreg; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; reset_srcreg(&inst->U.I.SrcReg[2]); inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[2].Index = tmpreg; } } if (opcode->NumSrcRegs >= 2) { if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = tmpreg; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; reset_srcreg(&inst->U.I.SrcReg[1]); inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[1].Index = tmpreg; } } return 1; }
static struct rc_instruction *emit1( struct radeon_compiler * c, struct rc_instruction * after, rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, struct rc_src_register SrcReg) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); fpi->U.I.Opcode = Opcode; fpi->U.I.SaturateMode = Saturate; fpi->U.I.DstReg = DstReg; fpi->U.I.SrcReg[0] = SrcReg; return fpi; }
/** * The NV_vertex_program spec mandates that all registers be * initialized to zero. We do this here unconditionally. * * \note We rely on dead-code elimination in the compiler. */ static void initialize_NV_registers(struct radeon_compiler * compiler) { unsigned int reg; struct rc_instruction * inst; for(reg = 0; reg < 12; ++reg) { inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = reg; inst->U.I.SrcReg[0].File = RC_FILE_NONE; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); inst->U.I.Opcode = RC_OPCODE_ARL; inst->U.I.DstReg.File = RC_FILE_ADDRESS; inst->U.I.DstReg.Index = 0; inst->U.I.DstReg.WriteMask = WRITEMASK_X; inst->U.I.SrcReg[0].File = RC_FILE_NONE; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; }
/** * Rewrite the program such that a given output is duplicated. */ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) { unsigned tempreg = rc_find_free_temporary(c); struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tempreg; } } } inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = output; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = tempreg; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = dup_output; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = tempreg; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; c->Program.OutputsWritten |= 1 << dup_output; }
static struct rc_instruction *emit1( struct radeon_compiler * c, struct rc_instruction * after, rc_opcode Opcode, struct rc_sub_instruction * base, struct rc_dst_register DstReg, struct rc_src_register SrcReg) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); if (base) { memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); } fpi->U.I.Opcode = Opcode; fpi->U.I.DstReg = DstReg; fpi->U.I.SrcReg[0] = SrcReg; return fpi; }
/** * Emit all ready texture instructions in a single block. * * Emit as a single block to (hopefully) sample many textures in parallel, * and to avoid hardware indirections on R300. */ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) { struct schedule_instruction *readytex; struct rc_instruction * inst_begin; assert(s->ReadyTEX); notify_sem_wait(s); /* Node marker for R300 */ inst_begin = rc_insert_new_instruction(s->C, before->Prev); inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; /* Link texture instructions back in */ readytex = s->ReadyTEX; while(readytex) { rc_insert_instruction(before->Prev, readytex->Instruction); DBG("%i: commit TEX reads\n", readytex->Instruction->IP); /* All of the TEX instructions in the same TEX block have * their source registers read from before any of the * instructions in that block write to their destination * registers. This means that when we commit a TEX * instruction, any other TEX instruction that wants to write * to one of the committed instruction's source register can be * marked as ready and should be emitted in the same TEX * block. This prevents the following sequence from being * emitted in two different TEX blocks: * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; */ commit_update_reads(s, readytex); readytex = readytex->NextReady; } readytex = s->ReadyTEX; s->ReadyTEX = 0; while(readytex){ DBG("%i: commit TEX writes\n", readytex->Instruction->IP); commit_update_writes(s, readytex); /* Set semaphore bits for last TEX instruction in the block */ if (!readytex->NextReady) { readytex->Instruction->U.I.TexSemAcquire = 1; readytex->Instruction->U.I.TexSemWait = 1; } rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); readytex = readytex->NextReady; } }
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) { int i; for(i = 0; i < 32; ++i) { if ((compiler->RequiredOutputs & (1 << i)) && !(compiler->Base.Program.OutputsWritten & (1 << i))) { struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = i; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; inst->U.I.SrcReg[0].Index = 0; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; compiler->Base.Program.OutputsWritten |= 1 << i; } } }
static void scale_texcoords(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst, unsigned state_constant) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(&compiler->Base); inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MUL; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mov->U.I.SrcReg[1].Index = rc_constants_add_state(&compiler->Base.Program.Constants, state_constant, inst->U.I.TexSrcUnit); reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; }
/** * Introduce standard code fragment to deal with fragment.position. */ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, int full_vtransform) { unsigned tempregi = rc_find_free_temporary(c); struct rc_instruction * inst_rcp; struct rc_instruction * inst_mul; struct rc_instruction * inst_mad; struct rc_instruction * inst; c->Program.InputsRead &= ~(1 << wpos); c->Program.InputsRead |= 1 << new_input; /* perspective divide */ inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = tempregi; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; inst_rcp->U.I.SrcReg[0].Index = new_input; inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; inst_mul = rc_insert_new_instruction(c, inst_rcp); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = tempregi; inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; inst_mul->U.I.SrcReg[0].Index = new_input; inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[1].Index = tempregi; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ inst_mad = rc_insert_new_instruction(c, inst_mul); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tempregi; inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = tempregi; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; if (full_vtransform) { inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); } else { inst_mad->U.I.SrcReg[1].Index = inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); } for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == wpos) { inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = tempregi; } } } }
/** * Transform TEX, TXP, TXB, and KIL instructions in the following ways: * - implement texture compare (shadow extensions) * - extract non-native source / destination operands * - premultiply texture coordinates for RECT * - extract operand swizzles * - introduce a temporary register when write masks are needed */ int radeonTransformTEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; if (inst->U.I.Opcode != RC_OPCODE_TEX && inst->U.I.Opcode != RC_OPCODE_TXB && inst->U.I.Opcode != RC_OPCODE_TXP && inst->U.I.Opcode != RC_OPCODE_KIL) return 0; /* ARB_shadow & EXT_shadow_funcs */ if (inst->U.I.Opcode != RC_OPCODE_KIL && ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.Opcode = RC_OPCODE_MOV; if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.SrcReg[0].File = RC_FILE_NONE; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); } return 1; } else { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; struct rc_instruction * inst_rcp = NULL; struct rc_instruction * inst_mad; struct rc_instruction * inst_cmp; unsigned tmp_texsample; unsigned tmp_sum; unsigned tmp_recip_w = 0; int pass, fail, tex; /* Save the output register. */ struct rc_dst_register output_reg = inst->U.I.DstReg; /* Redirect TEX to a new temp. */ tmp_texsample = rc_find_free_temporary(c); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; if (inst->U.I.Opcode == RC_OPCODE_TXP) { tmp_recip_w = rc_find_free_temporary(c); /* Compute 1/W. */ inst_rcp = rc_insert_new_instruction(c, inst); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = tmp_recip_w; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); } /* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */ tmp_sum = rc_find_free_temporary(c); inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tmp_sum; inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); if (inst->U.I.Opcode == RC_OPCODE_TXP) { inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[1].Index = tmp_recip_w; inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; tex = 2; } else { inst_mad->U.I.Opcode = RC_OPCODE_ADD; tex = 1; } inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[tex].Index = tmp_texsample; inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle; /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */ if (comparefunc == RC_COMPARE_FUNC_EQUAL) { comparefunc = RC_COMPARE_FUNC_GEQUAL; } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { comparefunc = RC_COMPARE_FUNC_LESS; } /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and: * LESS: r < tex <=> -tex+r < 0 * GEQUAL: r >= tex <=> not (-tex+r < 0) * GREATER: r > tex <=> tex-r < 0 * LEQUAL: r <= tex <=> not ( tex-r < 0) * * This negates either r or tex: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW; else inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; /* This negates the whole expresion: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { pass = 2; fail = 1; } inst_cmp = rc_insert_new_instruction(c, inst_mad); inst_cmp->U.I.Opcode = RC_OPCODE_CMP; inst_cmp->U.I.DstReg = output_reg; inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->U.I.SrcReg[0].Index = tmp_sum; inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); } } /* Divide by W if needed. */ if (inst->U.I.Opcode == RC_OPCODE_TXP && (compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode == RC_WRAP_REPEAT || compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode == RC_WRAP_MIRRORED_REPEAT)) { projective_divide(compiler, inst); } /* Texture wrap modes don't work on NPOT textures or texrects. * * The game plan is simple. We have two flags, fake_npot and * non_normalized_coords, as well as a tex target. The RECT tex target * will make the emitted code use non-scaled texcoords. * * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and * mirroring are not. If we need to repeat, we do: * * MUL temp, texcoord, <scaling factor constant> * FRC temp, temp ; Discard integer portion of coords * * This gives us coords in [0, 1]. * * Mirroring is trickier. We're going to start out like repeat: * * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] * ; so scale to [0, 1] * FRC temp, temp ; Make the pattern repeat * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. * ; The pattern is backwards, so reverse it (1-x). * * This gives us coords in [0, 1]. * * ~ C & M. ;) */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot || compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) { rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; /* R300 cannot sample from rectangles. */ if (!c->is_r500) { lower_texture_rect(compiler, inst); } if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot && wrapmode != RC_WRAP_NONE) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(c); /* For NPOT fallback, we need normalized coordinates anyway. */ if (c->is_r500) { lower_texture_rect(compiler, inst); } if (wrapmode == RC_WRAP_REPEAT) { /* Both instructions will be paired up. */ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { /* * Function: * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) * * Code: * MUL temp, src0, 0.5 * FRC temp, temp * MAD temp, temp, 2, -1 * ADD temp, 1, -abs(temp) */ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; unsigned two, two_swizzle; inst_mul = rc_insert_new_instruction(c, inst->Prev); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = temp; inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_frc->U.I.SrcReg[0].Index = temp; inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); inst_mad = rc_insert_new_instruction(c, inst->Prev); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = temp; inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = temp; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[1].Index = two; inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; inst_add = rc_insert_new_instruction(c, inst->Prev); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = temp; inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[1].Index = temp; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_add->U.I.SrcReg[1].Abs = 1; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { /* * Mirrored clamp modes are bloody simple, we just use abs * to mirror [0, 1] into [-1, 0]. This works for * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. */ struct rc_instruction *inst_mov; inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mov->U.I.SrcReg[0].Abs = 1; } /* Preserve W for TXP/TXB. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; } } /* Cannot write texture to output registers (all chips) or with masks (non-r500) */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg = inst->U.I.DstReg; inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } return 1; }
/** * Transform TEX, TXP, TXB, and KIL instructions in the following ways: * - implement texture compare (shadow extensions) * - extract non-native source / destination operands * - premultiply texture coordinates for RECT * - extract operand swizzles * - introduce a temporary register when write masks are needed */ int radeonTransformTEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; if (inst->U.I.Opcode != RC_OPCODE_TEX && inst->U.I.Opcode != RC_OPCODE_TXB && inst->U.I.Opcode != RC_OPCODE_TXP && inst->U.I.Opcode != RC_OPCODE_TXD && inst->U.I.Opcode != RC_OPCODE_TXL && inst->U.I.Opcode != RC_OPCODE_KIL) return 0; /* ARB_shadow & EXT_shadow_funcs */ if (inst->U.I.Opcode != RC_OPCODE_KIL && ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.Opcode = RC_OPCODE_MOV; if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); } else { inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); } return 1; } else { struct rc_instruction * inst_rcp = NULL; struct rc_instruction *inst_mul, *inst_add, *inst_cmp; unsigned tmp_texsample; unsigned tmp_sum; int pass, fail; /* Save the output register. */ struct rc_dst_register output_reg = inst->U.I.DstReg; unsigned saturate_mode = inst->U.I.SaturateMode; /* Redirect TEX to a new temp. */ tmp_texsample = rc_find_free_temporary(c); inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; tmp_sum = rc_find_free_temporary(c); if (inst->U.I.Opcode == RC_OPCODE_TXP) { /* Compute 1/W. */ inst_rcp = rc_insert_new_instruction(c, inst); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = tmp_sum; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); } /* Divide Z by W (if it's TXP) and saturate. */ inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = tmp_sum; inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); if (inst->U.I.Opcode == RC_OPCODE_TXP) { inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[1].Index = tmp_sum; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; } /* Add the depth texture value. */ inst_add = rc_insert_new_instruction(c, inst_mul); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = tmp_sum; inst_add->U.I.DstReg.WriteMask = RC_MASK_W; inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[0].Index = tmp_sum; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[1].Index = tmp_texsample; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; /* Note that SrcReg[0] is r, SrcReg[1] is tex and: * LESS: r < tex <=> -tex+r < 0 * GEQUAL: r >= tex <=> not (-tex+r < 0) * GREATER: r > tex <=> tex-r < 0 * LEQUAL: r <= tex <=> not ( tex-r < 0) * EQUAL: GEQUAL * NOTEQUAL:LESS */ /* This negates either r or tex: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; else inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; /* This negates the whole expresion: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { pass = 1; fail = 2; } else { pass = 2; fail = 1; } inst_cmp = rc_insert_new_instruction(c, inst_add); inst_cmp->U.I.Opcode = RC_OPCODE_CMP; inst_cmp->U.I.SaturateMode = saturate_mode; inst_cmp->U.I.DstReg = output_reg; inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->U.I.SrcReg[0].Index = tmp_sum; inst_cmp->U.I.SrcReg[0].Swizzle = combine_swizzles(RC_SWIZZLE_WWWW, compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); assert(tmp_texsample != tmp_sum); } } /* R300 cannot sample from rectangles and the wrap mode fallback needs * normalized coordinates anyway. */ if (inst->U.I.Opcode != RC_OPCODE_KIL && is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); inst->U.I.TexSrcTarget = RC_TEXTURE_2D; } /* Divide by W if needed. */ if (inst->U.I.Opcode == RC_OPCODE_TXP && (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { projective_divide(compiler, inst); } /* Texture wrap modes don't work on NPOT textures. * * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and * mirroring are not. If we need to repeat, we do: * * MUL temp, texcoord, <scaling factor constant> * FRC temp, temp ; Discard integer portion of coords * * This gives us coords in [0, 1]. * * Mirroring is trickier. We're going to start out like repeat: * * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] * ; so scale to [0, 1] * FRC temp, temp ; Make the pattern repeat * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. * ; The pattern is backwards, so reverse it (1-x). * * This gives us coords in [0, 1]. * * ~ C & M. ;) */ if (inst->U.I.Opcode != RC_OPCODE_KIL && wrapmode != RC_WRAP_NONE) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(c); if (wrapmode == RC_WRAP_REPEAT) { /* Both instructions will be paired up. */ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { /* * Function: * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) * * Code: * MUL temp, src0, 0.5 * FRC temp, temp * MAD temp, temp, 2, -1 * ADD temp, 1, -abs(temp) */ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; unsigned two, two_swizzle; inst_mul = rc_insert_new_instruction(c, inst->Prev); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = temp; inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; inst_frc = rc_insert_new_instruction(c, inst->Prev); inst_frc->U.I.Opcode = RC_OPCODE_FRC; inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_frc->U.I.DstReg.Index = temp; inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_frc->U.I.SrcReg[0].Index = temp; inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); inst_mad = rc_insert_new_instruction(c, inst->Prev); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = temp; inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = temp; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[1].Index = two; inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; inst_add = rc_insert_new_instruction(c, inst->Prev); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = temp; inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_add->U.I.SrcReg[1].Index = temp; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_add->U.I.SrcReg[1].Abs = 1; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { /* * Mirrored clamp modes are bloody simple, we just use abs * to mirror [0, 1] into [-1, 0]. This works for * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. */ struct rc_instruction *inst_mov; inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mov->U.I.SrcReg[0].Abs = 1; } /* Preserve W for TXP/TXB. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; } /* NPOT -> POT conversion for 3D textures. */ if (inst->U.I.Opcode != RC_OPCODE_KIL && compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { struct rc_instruction *inst_mov; unsigned temp = rc_find_free_temporary(c); /* Saturate XYZ. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; /* Copy W. */ inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = temp; inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = temp; scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); } /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 */ if (inst->U.I.Opcode != RC_OPCODE_KIL && compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { unsigned two, two_swizzle; struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); inst_mul = rc_insert_new_instruction(c, inst); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ inst_mul->U.I.SrcReg[1].Index = two; inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; inst_mad = rc_insert_new_instruction(c, inst_mul); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; inst_cnd = rc_insert_new_instruction(c, inst_mad); inst_cnd->U.I.Opcode = RC_OPCODE_CND; inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; inst_cnd->U.I.DstReg = inst->U.I.DstReg; inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot write texture to output registers or with saturate (all chips), * or with masks (non-r500). */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.SaturateMode || (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; inst_mov->U.I.DstReg = inst->U.I.DstReg; inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; reset_srcreg(&inst->U.I.SrcReg[0]); inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } return 1; }