/** * Scan forward in program from 'start' for the next occurance of TEMP[index]. * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator * that we can't look further. */ static enum temp_use find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) { GLuint i; for (i = start; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; switch (inst->Opcode) { case OPCODE_BGNLOOP: case OPCODE_ENDLOOP: case OPCODE_BGNSUB: case OPCODE_ENDSUB: return FLOW; default: { const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY && inst->SrcReg[j].Index == index) return READ; } if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == index) return WRITE; } } } return END; }
static int num_pairinst_args(GLuint opcode) { if (opcode == OPCODE_REPL_ALPHA) return 0; else return _mesa_num_inst_src_regs(opcode); }
/** * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which * are read from the given src in this instruction, We also provide * one optional masks which may mask other components in the dst * register */ static GLuint get_src_arg_mask(const struct prog_instruction *inst, GLuint arg, GLuint dst_mask) { GLuint read_mask, channel_mask; GLuint comp; ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode)); /* Form the dst register, find the written channels */ if (inst->CondUpdate) { channel_mask = WRITEMASK_XYZW; } else { switch (inst->Opcode) { case OPCODE_MOV: case OPCODE_MIN: case OPCODE_MAX: case OPCODE_ABS: case OPCODE_ADD: case OPCODE_MAD: case OPCODE_MUL: case OPCODE_SUB: channel_mask = inst->DstReg.WriteMask & dst_mask; break; case OPCODE_RCP: case OPCODE_SIN: case OPCODE_COS: case OPCODE_RSQ: case OPCODE_POW: case OPCODE_EX2: case OPCODE_LOG: channel_mask = WRITEMASK_X; break; case OPCODE_DP2: channel_mask = WRITEMASK_XY; break; case OPCODE_DP3: case OPCODE_XPD: channel_mask = WRITEMASK_XYZ; break; default: channel_mask = WRITEMASK_XYZW; break; } } /* Now, given the src swizzle and the written channels, find which * components are actually read */ read_mask = 0x0; for (comp = 0; comp < 4; ++comp) { const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp); ASSERT(coord < 4); if (channel_mask & (1 << comp) && coord <= SWIZZLE_W) read_mask |= 1 << coord; } return read_mask; }
/** * Scan the given program to find a free register of the given type. * \param regFile - PROGRAM_INPUT, PROGRAM_OUTPUT or PROGRAM_TEMPORARY */ GLint _mesa_find_free_register(const struct gl_program *prog, GLuint regFile) { GLboolean used[MAX_PROGRAM_TEMPS]; GLuint i, k; assert(regFile == PROGRAM_INPUT || regFile == PROGRAM_OUTPUT || regFile == PROGRAM_TEMPORARY); _mesa_memset(used, 0, sizeof(used)); for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); for (k = 0; k < n; k++) { if (inst->SrcReg[k].File == regFile) { used[inst->SrcReg[k].Index] = GL_TRUE; } } } for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { if (!used[i]) return i; } return -1; }
/** * Print a single NVIDIA vertex program instruction. */ void _mesa_print_nv_vertex_instruction(const struct prog_instruction *inst) { GLuint i, n; switch (inst->Opcode) { case OPCODE_MOV: case OPCODE_LIT: case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_EXP: case OPCODE_LOG: case OPCODE_RCC: case OPCODE_ABS: case OPCODE_MUL: case OPCODE_ADD: case OPCODE_DP3: case OPCODE_DP4: case OPCODE_DST: case OPCODE_MIN: case OPCODE_MAX: case OPCODE_SLT: case OPCODE_SGE: case OPCODE_DPH: case OPCODE_SUB: case OPCODE_MAD: _mesa_printf("%s ", _mesa_opcode_string(inst->Opcode)); PrintDstReg(&inst->DstReg); _mesa_printf(", "); n = _mesa_num_inst_src_regs(inst->Opcode); for (i = 0; i < n; i++) { PrintSrcReg(&inst->SrcReg[i]); if (i + 1 < n) _mesa_printf(", "); } _mesa_printf(";\n"); break; case OPCODE_ARL: _mesa_printf("ARL A0.x, "); PrintSrcReg(&inst->SrcReg[0]); _mesa_printf(";\n"); break; case OPCODE_PRINT: _mesa_printf("PRINT '%s'", inst->Data); if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { _mesa_printf(", "); PrintSrcReg(&inst->SrcReg[0]); _mesa_printf(";\n"); } else { _mesa_printf("\n"); } break; case OPCODE_END: _mesa_printf("END\n"); break; default: _mesa_printf("BAD INSTRUCTION\n"); } }
/** * Populate the 'used' array with flags indicating which registers (TEMPs, * INPUTs, OUTPUTs, etc, are used by the given program. * \param file type of register to scan for * \param used returns true/false flags for in use / free * \param usedSize size of the 'used' array */ void _mesa_find_used_registers(const struct gl_program *prog, gl_register_file file, GLboolean used[], GLuint usedSize) { GLuint i, j; memset(used, 0, usedSize); for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); if (inst->DstReg.File == file) { assert(inst->DstReg.Index < usedSize); if(inst->DstReg.Index < usedSize) used[inst->DstReg.Index] = GL_TRUE; } for (j = 0; j < n; j++) { if (inst->SrcReg[j].File == file) { assert(inst->SrcReg[j].Index < (GLint) usedSize); if (inst->SrcReg[j].Index < (GLint) usedSize) used[inst->SrcReg[j].Index] = GL_TRUE; } } } }
void _mesa_program_fragment_position_to_sysval(struct gl_program *prog) { GLuint i; if (prog->Target != GL_FRAGMENT_PROGRAM_ARB || !(prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_POS))) return; prog->InputsRead &= ~BITFIELD64_BIT(VARYING_SLOT_POS); prog->SystemValuesRead |= 1 << SYSTEM_VALUE_FRAG_COORD; for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_INPUT && inst->SrcReg[j].Index == VARYING_SLOT_POS) { inst->SrcReg[j].File = PROGRAM_SYSTEM_VALUE; inst->SrcReg[j].Index = SYSTEM_VALUE_FRAG_COORD; } } } }
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) { int nsrc = _mesa_num_inst_src_regs(inst->Opcode); int i; for(i = 0; i < nsrc; ++i) if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) inst->SrcReg[i].Index = newindex; }
/** * Scan program instructions to update the program's InputsRead and * OutputsWritten fields. */ static void _slang_update_inputs_outputs(struct gl_program *prog) { GLuint i, j; GLuint maxAddrReg = 0; prog->InputsRead = 0x0; prog->OutputsWritten = 0x0; for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_INPUT) { prog->InputsRead |= 1 << inst->SrcReg[j].Index; } else if (inst->SrcReg[j].File == PROGRAM_ADDRESS) { maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1)); } } if (inst->DstReg.File == PROGRAM_OUTPUT) { prog->OutputsWritten |= BITFIELD64_BIT(inst->DstReg.Index); if (inst->DstReg.RelAddr) { /* If the output attribute is indexed with relative addressing * we know that it must be a varying or texcoord such as * gl_TexCoord[i] = v; In this case, mark all the texcoords * or varying outputs as being written. It's not an error if * a vertex shader writes varying vars that aren't used by the * fragment shader. But it is an error for a fragment shader * to use varyings that are not written by the vertex shader. */ if (prog->Target == GL_VERTEX_PROGRAM_ARB) { if (inst->DstReg.Index == VERT_RESULT_TEX0) { /* mark all texcoord outputs as written */ const GLbitfield64 mask = BITFIELD64_RANGE(VERT_RESULT_TEX0, (VERT_RESULT_TEX0 + MAX_TEXTURE_COORD_UNITS - 1)); prog->OutputsWritten |= mask; } else if (inst->DstReg.Index == VERT_RESULT_VAR0) { /* mark all generic varying outputs as written */ const GLbitfield64 mask = BITFIELD64_RANGE(VERT_RESULT_VAR0, (VERT_RESULT_VAR0 + MAX_VARYING - 1)); prog->OutputsWritten |= mask; } } } } else if (inst->DstReg.File == PROGRAM_ADDRESS) { maxAddrReg = MAX2(maxAddrReg, inst->DstReg.Index + 1); } } prog->NumAddressRegs = maxAddrReg; }
/** * "Post-process" a GPU program. This is intended to be used for debugging. * Example actions include no-op'ing instructions or changing instruction * behaviour. */ void _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog) { static const GLfloat white[4] = { 0.5, 0.5, 0.5, 0.5 }; GLuint i; GLuint whiteSwizzle; GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters, (gl_constant_value *) white, 4, &whiteSwizzle); (void) whiteIndex; for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); (void) n; if (_mesa_is_tex_instruction(inst->Opcode)) { #if 0 /* replace TEX/TXP/TXB with MOV */ inst->Opcode = OPCODE_MOV; inst->DstReg.WriteMask = WRITEMASK_XYZW; inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; inst->SrcReg[0].Negate = NEGATE_NONE; #endif #if 0 /* disable shadow texture mode */ inst->TexShadow = 0; #endif } if (inst->Opcode == OPCODE_TXP) { #if 0 inst->Opcode = OPCODE_MOV; inst->DstReg.WriteMask = WRITEMASK_XYZW; inst->SrcReg[0].File = PROGRAM_CONSTANT; inst->SrcReg[0].Index = whiteIndex; inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; inst->SrcReg[0].Negate = NEGATE_NONE; #endif #if 0 inst->TexShadow = 0; #endif #if 0 inst->Opcode = OPCODE_TEX; inst->TexShadow = 0; #endif } } }
/** * Scan forward in program from 'start' for the next occurances of TEMP[index]. * We look if an instruction reads the component given by the masks and if they * are overwritten. * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator * that we can't look further. */ static enum inst_use find_next_use(const struct gl_program *prog, GLuint start, GLuint index, GLuint mask) { GLuint i; for (i = start; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; switch (inst->Opcode) { case OPCODE_BGNLOOP: case OPCODE_BGNSUB: case OPCODE_BRA: case OPCODE_CAL: case OPCODE_CONT: case OPCODE_IF: case OPCODE_ELSE: case OPCODE_ENDIF: case OPCODE_ENDLOOP: case OPCODE_ENDSUB: case OPCODE_RET: return FLOW; case OPCODE_END: return END; default: { const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].RelAddr || (inst->SrcReg[j].File == PROGRAM_TEMPORARY && inst->SrcReg[j].Index == index && (get_src_arg_mask(inst,j,NO_MASK) & mask))) return READ; } if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 && inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == index) { mask &= ~inst->DstReg.WriteMask; if (mask == 0) return WRITE; } } } } return END; }
/** * Search instructions for references to program parameters. When found, * increment the parameter index by 'offset'. * Used when combining programs. */ static void adjust_param_indexes(struct prog_instruction *inst, GLuint numInst, GLuint offset) { GLuint i, j; for (i = 0; i < numInst; i++) { for (j = 0; j < _mesa_num_inst_src_regs(inst->Opcode); j++) { GLuint f = inst[i].SrcReg[j].File; if (f == PROGRAM_CONSTANT || f == PROGRAM_UNIFORM || f == PROGRAM_STATE_VAR) { inst[i].SrcReg[j].Index += offset; } } } }
/** * Find the temporaries which are used in the given program. */ static void find_temporaries(const struct gl_program *program, GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) { GLuint i, j; for (i = 0; i < MAX_PROGRAM_TEMPS; i++) tempsUsed[i] = GL_FALSE; for (i = 0; i < program->NumInstructions; i++) { const struct prog_instruction *inst = program->Instructions + i; const GLuint n = _mesa_num_inst_src_regs( inst->Opcode ); for (j = 0; j < n; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) tempsUsed[inst->SrcReg[j].Index] = GL_TRUE; if (inst->DstReg.File == PROGRAM_TEMPORARY) tempsUsed[inst->DstReg.Index] = GL_TRUE; } } }
/** Return number of src args for given instruction */ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { case WM_FRONTFACING: case WM_PIXELXY: return 0; case WM_CINTERP: case WM_WPOSXY: case WM_DELTAXY: return 1; case WM_LINTERP: case WM_PIXELW: return 2; case WM_FB_WRITE: case WM_PINTERP: return 3; default: assert(opcode < MAX_OPCODE); return _mesa_num_inst_src_regs(opcode); } }
/** * Complements dead_code_global. Try to remove code in block of code by * carefully monitoring the swizzles. Both functions should be merged into one * with a proper control flow graph */ static GLboolean _mesa_remove_dead_code_local(struct gl_program *prog) { GLboolean *removeInst; GLuint i, arg, rem = 0; removeInst = (GLboolean *) calloc(1, prog->NumInstructions * sizeof(GLboolean)); for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint index = inst->DstReg.Index; const GLuint mask = inst->DstReg.WriteMask; enum inst_use use; /* We must deactivate the pass as soon as some indirection is used */ if (inst->DstReg.RelAddr) goto done; for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) if (inst->SrcReg[arg].RelAddr) goto done; if (_mesa_is_flow_control_opcode(inst->Opcode) || _mesa_num_inst_dst_regs(inst->Opcode) == 0 || inst->DstReg.File != PROGRAM_TEMPORARY || inst->DstReg.RelAddr) continue; use = find_next_use(prog, i+1, index, mask); if (use == WRITE || use == END) removeInst[i] = GL_TRUE; } rem = remove_instructions(prog, removeInst); done: free(removeInst); return rem != 0; }
/** * Search instructions for registers that match (oldFile, oldIndex), * replacing them with (newFile, newIndex). */ static void replace_registers(struct prog_instruction *inst, GLuint numInst, GLuint oldFile, GLuint oldIndex, GLuint newFile, GLuint newIndex) { GLuint i, j; for (i = 0; i < numInst; i++) { /* src regs */ for (j = 0; j < _mesa_num_inst_src_regs(inst->Opcode); j++) { if (inst[i].SrcReg[j].File == oldFile && inst[i].SrcReg[j].Index == oldIndex) { inst[i].SrcReg[j].File = newFile; inst[i].SrcReg[j].Index = newIndex; } } /* dst reg */ if (inst[i].DstReg.File == oldFile && inst[i].DstReg.Index == oldIndex) { inst[i].DstReg.File = newFile; inst[i].DstReg.Index = newIndex; } } }
/** * Scan program instructions to update the program's NumTemporaries field. * Note: this implemenation relies on the code generator allocating * temps in increasing order (0, 1, 2, ... ). */ static void _slang_count_temporaries(struct gl_program *prog) { GLuint i, j; GLint maxIndex = -1; for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { if (maxIndex < inst->SrcReg[j].Index) maxIndex = inst->SrcReg[j].Index; } if (inst->DstReg.File == PROGRAM_TEMPORARY) { if (maxIndex < (GLint) inst->DstReg.Index) maxIndex = inst->DstReg.Index; } } } prog->NumTemporaries = (GLuint) (maxIndex + 1); }
/** * Remap register indexes according to map. * \param prog the program to search/replace * \param file the type of register file to search/replace * \param map maps old register indexes to new indexes */ static void replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) { GLuint i; for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == file) { GLuint index = inst->SrcReg[j].Index; ASSERT(map[index] >= 0); inst->SrcReg[j].Index = map[index]; } } if (inst->DstReg.File == file) { const GLuint index = inst->DstReg.Index; ASSERT(map[index] >= 0); inst->DstReg.Index = map[index]; } } }
/* * TODO: consider moving this into core */ static void calc_live_regs( struct i915_fragment_program *p ) { const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; GLuint regsUsed = 0xffff0000; GLint i; for (i = program->Base.NumInstructions - 1; i >= 0; i--) { struct prog_instruction *inst = &program->Base.Instructions[i]; int opArgs = _mesa_num_inst_src_regs(inst->Opcode); int a; /* Register is written to: unmark as live for this and preceeding ops */ if (inst->DstReg.File == PROGRAM_TEMPORARY) regsUsed &= ~(1 << inst->DstReg.Index); for (a = 0; a < opArgs; a++) { /* Register is read from: mark as live for this and preceeding ops */ if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) regsUsed |= 1 << inst->SrcReg[a].Index; } p->usedRegs[i] = regsUsed; } }
static void compile_instruction( struct gl_context *ctx, struct st_translate *t, const struct prog_instruction *inst, boolean clamp_dst_color_output) { struct ureg_program *ureg = t->ureg; GLuint i; struct ureg_dst dst[1] = { { 0 } }; struct ureg_src src[4]; unsigned num_dst; unsigned num_src; num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); num_src = _mesa_num_inst_src_regs( inst->Opcode ); if (num_dst) dst[0] = translate_dst( t, &inst->DstReg, inst->Saturate, clamp_dst_color_output); for (i = 0; i < num_src; i++) src[i] = translate_src( t, &inst->SrcReg[i] ); switch( inst->Opcode ) { case OPCODE_SWZ: emit_swz( t, dst[0], &inst->SrcReg[0] ); return; case OPCODE_BGNLOOP: case OPCODE_CAL: case OPCODE_ELSE: case OPCODE_ENDLOOP: debug_assert(num_dst == 0); ureg_label_insn( ureg, translate_opcode( inst->Opcode ), src, num_src, get_label( t, inst->BranchTarget )); return; case OPCODE_IF: debug_assert(num_dst == 0); ureg_label_insn( ureg, ctx->Const.NativeIntegers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF, src, num_src, get_label( t, inst->BranchTarget )); return; case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXD: case OPCODE_TXL: case OPCODE_TXP: src[num_src++] = t->samplers[inst->TexSrcUnit]; ureg_tex_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, st_translate_texture_target( inst->TexSrcTarget, inst->TexShadow ), NULL, 0, src, num_src ); return; case OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); ureg_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, src, num_src ); break; case OPCODE_XPD: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); ureg_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, src, num_src ); break; case OPCODE_NOISE1: case OPCODE_NOISE2: case OPCODE_NOISE3: case OPCODE_NOISE4: /* At some point, a motivated person could add a better * implementation of noise. Currently not even the nvidia * binary drivers do anything more than this. In any case, the * place to do this is in the GL state tracker, not the poor * driver. */ ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); break; case OPCODE_DDY: emit_ddy( t, dst[0], &inst->SrcReg[0] ); break; case OPCODE_RSQ: ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) ); break; default: ureg_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, src, num_src ); break; } }
/** * Scan/rewrite program to remove reads of custom (output) registers. * The passed type has to be PROGRAM_OUTPUT. * On some hardware, trying to read an output register causes trouble. * So, rewrite the program to use a temporary register in this case. */ void _mesa_remove_output_reads(struct gl_program *prog, gl_register_file type) { GLuint i; GLint outputMap[VARYING_SLOT_MAX]; GLuint numVaryingReads = 0; GLboolean usedTemps[MAX_PROGRAM_TEMPS]; GLuint firstTemp = 0; _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, usedTemps, MAX_PROGRAM_TEMPS); assert(type == PROGRAM_OUTPUT); for (i = 0; i < VARYING_SLOT_MAX; i++) outputMap[i] = -1; /* look for instructions which read from varying vars */ for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == type) { /* replace the read with a temp reg */ const GLuint var = inst->SrcReg[j].Index; if (outputMap[var] == -1) { numVaryingReads++; outputMap[var] = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS, firstTemp); firstTemp = outputMap[var] + 1; } inst->SrcReg[j].File = PROGRAM_TEMPORARY; inst->SrcReg[j].Index = outputMap[var]; } } } if (numVaryingReads == 0) return; /* nothing to be done */ /* look for instructions which write to the varying vars identified above */ for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; if (inst->DstReg.File == type && outputMap[inst->DstReg.Index] >= 0) { /* change inst to write to the temp reg, instead of the varying */ inst->DstReg.File = PROGRAM_TEMPORARY; inst->DstReg.Index = outputMap[inst->DstReg.Index]; } } /* insert new instructions to copy the temp vars to the varying vars */ { struct prog_instruction *inst; GLint endPos, var; /* Look for END instruction and insert the new varying writes */ endPos = -1; for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; if (inst->Opcode == OPCODE_END) { endPos = i; _mesa_insert_instructions(prog, i, numVaryingReads); break; } } assert(endPos >= 0); /* insert new MOV instructions here */ inst = prog->Instructions + endPos; for (var = 0; var < VARYING_SLOT_MAX; var++) { if (outputMap[var] >= 0) { /* MOV VAR[var], TEMP[tmp]; */ inst->Opcode = OPCODE_MOV; inst->DstReg.File = type; inst->DstReg.Index = var; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = outputMap[var]; inst++; } } } }
/** * Count which (input, temporary) register is read and written how often, * and scan the instruction stream to find dependencies. */ static void scan_instructions(struct pair_state *s) { struct prog_instruction *inst; struct pair_state_instruction *pairinst; GLuint ip; for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; inst->Opcode != OPCODE_END; ++inst, ++pairinst, ++ip) { final_rewrite(s, inst); classify_instruction(s, inst, pairinst); int nsrc = _mesa_num_inst_src_regs(inst->Opcode); int j; for(j = 0; j < nsrc; j++) { struct pair_register_translation *t = get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); if (!t) continue; t->RefCount++; if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { int i; for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); if (swz >= 4) continue; /* constant or NIL swizzle */ if (!t->Value[swz]) continue; /* this is an undefined read */ /* Do not add a dependency if this instruction * also rewrites the value. The code below adds * a dependency for the DstReg, which is a superset * of the SrcReg dependency. */ if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == inst->SrcReg[j].Index && GET_BIT(inst->DstReg.WriteMask, swz)) continue; struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; pairinst->NumDependencies++; t->Value[swz]->NumReaders++; r->IP = ip; r->Next = t->Value[swz]->Readers; t->Value[swz]->Readers = r; } } } int ndst = _mesa_num_inst_dst_regs(inst->Opcode); if (ndst) { struct pair_register_translation *t = get_register(s, inst->DstReg.File, inst->DstReg.Index); if (t) { t->RefCount++; if (inst->DstReg.File == PROGRAM_TEMPORARY) { int j; for(j = 0; j < 4; ++j) { if (!GET_BIT(inst->DstReg.WriteMask, j)) continue; struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; v->IP = ip; if (t->Value[j]) { pairinst->NumDependencies++; t->Value[j]->Next = v; } t->Value[j] = v; pairinst->Values[j] = v; } } } } if (s->Verbose) _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); if (!pairinst->NumDependencies) instruction_ready(s, ip); } /* Clear the PROGRAM_TEMPORARY state */ int i, j; for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { for(j = 0; j < 4; ++j) s->Temps[i].Value[j] = 0; } }
/** * Update the dependency tracking state based on what the instruction * at the given IP does. */ static void commit_instruction(struct pair_state *s, int ip) { struct prog_instruction *inst = s->Program->Instructions + ip; struct pair_state_instruction *pairinst = s->Instructions + ip; if (s->Verbose) _mesa_printf("commit_instruction(%i)\n", ip); if (inst->DstReg.File == PROGRAM_TEMPORARY) { struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; deref_hw_reg(s, t->HwIndex); int i; for(i = 0; i < 4; ++i) { if (!GET_BIT(inst->DstReg.WriteMask, i)) continue; t->Value[i] = pairinst->Values[i]; if (t->Value[i]->NumReaders) { struct reg_value_reader *r; for(r = pairinst->Values[i]->Readers; r; r = r->Next) decrement_dependencies(s, r->IP); } else if (t->Value[i]->Next) { /* This happens when the only reader writes * the register at the same time */ decrement_dependencies(s, t->Value[i]->Next->IP); } } } int nsrc = _mesa_num_inst_src_regs(inst->Opcode); int i; for(i = 0; i < nsrc; i++) { struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); if (!t) continue; deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) continue; int j; for(j = 0; j < 4; ++j) { GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); if (swz >= 4) continue; if (!t->Value[swz]) continue; /* Do not free a dependency if this instruction * also rewrites the value. See scan_instructions. */ if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == inst->SrcReg[i].Index && GET_BIT(inst->DstReg.WriteMask, swz)) continue; if (!--t->Value[swz]->NumReaders) { if (t->Value[swz]->Next) decrement_dependencies(s, t->Value[swz]->Next->IP); } } } }
/** * Fill the given ALU instruction's opcodes and source operands into the given pair, * if possible. */ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) { struct pair_state_instruction *pairinst = s->Instructions + ip; struct prog_instruction *inst = s->Program->Instructions + ip; ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); if (pairinst->NeedRGB) { if (pairinst->IsTranscendent) pair->RGB.Opcode = OPCODE_REPL_ALPHA; else pair->RGB.Opcode = inst->Opcode; if (inst->SaturateMode == SATURATE_ZERO_ONE) pair->RGB.Saturate = 1; } if (pairinst->NeedAlpha) { pair->Alpha.Opcode = inst->Opcode; if (inst->SaturateMode == SATURATE_ZERO_ONE) pair->Alpha.Saturate = 1; } int nargs = _mesa_num_inst_src_regs(inst->Opcode); int i; /* Special case for DDX/DDY (MDH/MDV). */ if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) return GL_FALSE; else nargs++; } for(i = 0; i < nargs; ++i) { int source; if (pairinst->NeedRGB && !pairinst->IsTranscendent) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; GLuint negatebase = 0; int j; for(j = 0; j < 3; ++j) { GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); if (swz < 3) srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) negatebase = 1; } source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) return GL_FALSE; pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; } if (pairinst->NeedAlpha) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); if (swz < 3) srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) return GL_FALSE; pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; } } return GL_TRUE; }
/** * Print a single vertex/fragment program instruction. */ GLint _mesa_fprint_instruction_opt(FILE *f, const struct prog_instruction *inst, GLint indent, gl_prog_print_mode mode, const struct gl_program *prog) { GLint i; if (inst->Opcode == OPCODE_ELSE || inst->Opcode == OPCODE_ENDIF || inst->Opcode == OPCODE_ENDLOOP || inst->Opcode == OPCODE_ENDSUB) { indent -= 3; } for (i = 0; i < indent; i++) { fprintf(f, " "); } switch (inst->Opcode) { case OPCODE_SWZ: fprintf(f, "SWZ"); if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); fprintf(f, ", %s[%d], %s", _mesa_register_file_name((gl_register_file) inst->SrcReg[0].File), inst->SrcReg[0].Index, _mesa_swizzle_string(inst->SrcReg[0].Swizzle, inst->SrcReg[0].Negate, GL_TRUE)); fprint_comment(f, inst); break; case OPCODE_TEX: case OPCODE_TXP: case OPCODE_TXL: case OPCODE_TXB: case OPCODE_TXD: fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); fprintf(f, ", "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); if (inst->Opcode == OPCODE_TXD) { fprintf(f, ", "); fprint_src_reg(f, &inst->SrcReg[1], mode, prog); fprintf(f, ", "); fprint_src_reg(f, &inst->SrcReg[2], mode, prog); } fprintf(f, ", texture[%d], ", inst->TexSrcUnit); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: fprintf(f, "1D"); break; case TEXTURE_2D_INDEX: fprintf(f, "2D"); break; case TEXTURE_3D_INDEX: fprintf(f, "3D"); break; case TEXTURE_CUBE_INDEX: fprintf(f, "CUBE"); break; case TEXTURE_RECT_INDEX: fprintf(f, "RECT"); break; case TEXTURE_1D_ARRAY_INDEX: fprintf(f, "1D_ARRAY"); break; case TEXTURE_2D_ARRAY_INDEX: fprintf(f, "2D_ARRAY"); break; default: ; } if (inst->TexShadow) fprintf(f, " SHADOW"); fprint_comment(f, inst); break; case OPCODE_KIL: fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); fprintf(f, " "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); fprint_comment(f, inst); break; case OPCODE_ARL: fprintf(f, "ARL "); fprint_dst_reg(f, &inst->DstReg, mode, prog); fprintf(f, ", "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); fprint_comment(f, inst); break; case OPCODE_IF: fprintf(f, "IF "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); fprintf(f, "; "); fprintf(f, " # (if false, goto %d)", inst->BranchTarget); fprint_comment(f, inst); return indent + 3; case OPCODE_ELSE: fprintf(f, "ELSE; # (goto %d)\n", inst->BranchTarget); return indent + 3; case OPCODE_ENDIF: fprintf(f, "ENDIF;\n"); break; case OPCODE_BGNLOOP: fprintf(f, "BGNLOOP; # (end at %d)\n", inst->BranchTarget); return indent + 3; case OPCODE_ENDLOOP: fprintf(f, "ENDLOOP; # (goto %d)\n", inst->BranchTarget); break; case OPCODE_BRK: case OPCODE_CONT: fprintf(f, "%s; # (goto %d)", _mesa_opcode_string(inst->Opcode), inst->BranchTarget); fprint_comment(f, inst); break; case OPCODE_BGNSUB: fprintf(f, "BGNSUB"); fprint_comment(f, inst); return indent + 3; case OPCODE_ENDSUB: if (mode == PROG_PRINT_DEBUG) { fprintf(f, "ENDSUB"); fprint_comment(f, inst); } break; case OPCODE_CAL: fprintf(f, "CAL %u", inst->BranchTarget); fprint_comment(f, inst); break; case OPCODE_RET: fprintf(f, "RET"); fprint_comment(f, inst); break; case OPCODE_END: fprintf(f, "END\n"); break; case OPCODE_NOP: if (mode == PROG_PRINT_DEBUG) { fprintf(f, "NOP"); fprint_comment(f, inst); } else if (inst->Comment) { /* ARB/NV extensions don't have NOP instruction */ fprintf(f, "# %s\n", inst->Comment); } break; /* XXX may need other special-case instructions */ default: if (inst->Opcode < MAX_OPCODE) { /* typical alu instruction */ _mesa_fprint_alu_instruction(f, inst, _mesa_opcode_string(inst->Opcode), _mesa_num_inst_src_regs(inst->Opcode), mode, prog); } else { _mesa_fprint_alu_instruction(f, inst, _mesa_opcode_string(inst->Opcode), 3/*_mesa_num_inst_src_regs(inst->Opcode)*/, mode, prog); } break; } return indent; }
/** * Print a single vertex/fragment program instruction. */ static GLint _mesa_fprint_instruction_opt(FILE *f, const struct prog_instruction *inst, GLint indent, gl_prog_print_mode mode, const struct gl_program *prog) { GLint i; if (inst->Opcode == OPCODE_ELSE || inst->Opcode == OPCODE_ENDIF || inst->Opcode == OPCODE_ENDLOOP || inst->Opcode == OPCODE_ENDSUB) { indent -= 3; } for (i = 0; i < indent; i++) { _mesa_fprintf(f, " "); } switch (inst->Opcode) { case OPCODE_PRINT: _mesa_fprintf(f, "PRINT '%s'", inst->Data); if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { _mesa_fprintf(f, ", "); _mesa_fprintf(f, "%s[%d]%s", file_string((gl_register_file) inst->SrcReg[0].File, mode), inst->SrcReg[0].Index, _mesa_swizzle_string(inst->SrcReg[0].Swizzle, inst->SrcReg[0].Negate, GL_FALSE)); } if (inst->Comment) _mesa_fprintf(f, " # %s", inst->Comment); fprint_comment(f, inst); break; case OPCODE_SWZ: _mesa_fprintf(f, "SWZ"); if (inst->SaturateMode == SATURATE_ZERO_ONE) _mesa_fprintf(f, "_SAT"); _mesa_fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); _mesa_fprintf(f, ", %s[%d], %s", file_string((gl_register_file) inst->SrcReg[0].File, mode), inst->SrcReg[0].Index, _mesa_swizzle_string(inst->SrcReg[0].Swizzle, inst->SrcReg[0].Negate, GL_TRUE)); fprint_comment(f, inst); break; case OPCODE_TEX: case OPCODE_TXP: case OPCODE_TXL: case OPCODE_TXB: _mesa_fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); if (inst->SaturateMode == SATURATE_ZERO_ONE) _mesa_fprintf(f, "_SAT"); _mesa_fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); _mesa_fprintf(f, ", "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); _mesa_fprintf(f, ", texture[%d], ", inst->TexSrcUnit); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: _mesa_fprintf(f, "1D"); break; case TEXTURE_2D_INDEX: _mesa_fprintf(f, "2D"); break; case TEXTURE_3D_INDEX: _mesa_fprintf(f, "3D"); break; case TEXTURE_CUBE_INDEX: _mesa_fprintf(f, "CUBE"); break; case TEXTURE_RECT_INDEX: _mesa_fprintf(f, "RECT"); break; default: ; } if (inst->TexShadow) _mesa_fprintf(f, " SHADOW"); fprint_comment(f, inst); break; case OPCODE_KIL: _mesa_fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); _mesa_fprintf(f, " "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); fprint_comment(f, inst); break; case OPCODE_KIL_NV: _mesa_fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); _mesa_fprintf(f, " "); _mesa_fprintf(f, "%s.%s", _mesa_condcode_string(inst->DstReg.CondMask), _mesa_swizzle_string(inst->DstReg.CondSwizzle, GL_FALSE, GL_FALSE)); fprint_comment(f, inst); break; case OPCODE_ARL: _mesa_fprintf(f, "ARL "); fprint_dst_reg(f, &inst->DstReg, mode, prog); _mesa_fprintf(f, ", "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); fprint_comment(f, inst); break; case OPCODE_BRA: _mesa_fprintf(f, "BRA %d (%s%s)", inst->BranchTarget, _mesa_condcode_string(inst->DstReg.CondMask), _mesa_swizzle_string(inst->DstReg.CondSwizzle, 0, GL_FALSE)); fprint_comment(f, inst); break; case OPCODE_IF: if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { /* Use ordinary register */ _mesa_fprintf(f, "IF "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); _mesa_fprintf(f, "; "); } else { /* Use cond codes */ _mesa_fprintf(f, "IF (%s%s);", _mesa_condcode_string(inst->DstReg.CondMask), _mesa_swizzle_string(inst->DstReg.CondSwizzle, 0, GL_FALSE)); } _mesa_fprintf(f, " # (if false, goto %d)", inst->BranchTarget); fprint_comment(f, inst); return indent + 3; case OPCODE_ELSE: _mesa_fprintf(f, "ELSE; # (goto %d)\n", inst->BranchTarget); return indent + 3; case OPCODE_ENDIF: _mesa_fprintf(f, "ENDIF;\n"); break; case OPCODE_BGNLOOP: _mesa_fprintf(f, "BGNLOOP; # (end at %d)\n", inst->BranchTarget); return indent + 3; case OPCODE_ENDLOOP: _mesa_fprintf(f, "ENDLOOP; # (goto %d)\n", inst->BranchTarget); break; case OPCODE_BRK: case OPCODE_CONT: _mesa_fprintf(f, "%s (%s%s); # (goto %d)", _mesa_opcode_string(inst->Opcode), _mesa_condcode_string(inst->DstReg.CondMask), _mesa_swizzle_string(inst->DstReg.CondSwizzle, 0, GL_FALSE), inst->BranchTarget); fprint_comment(f, inst); break; case OPCODE_BGNSUB: if (mode == PROG_PRINT_NV) { _mesa_fprintf(f, "%s:\n", inst->Comment); /* comment is label */ return indent; } else { _mesa_fprintf(f, "BGNSUB"); fprint_comment(f, inst); return indent + 3; } case OPCODE_ENDSUB: if (mode == PROG_PRINT_DEBUG) { _mesa_fprintf(f, "ENDSUB"); fprint_comment(f, inst); } break; case OPCODE_CAL: if (mode == PROG_PRINT_NV) { _mesa_fprintf(f, "CAL %s; # (goto %d)\n", inst->Comment, inst->BranchTarget); } else { _mesa_fprintf(f, "CAL %u", inst->BranchTarget); fprint_comment(f, inst); } break; case OPCODE_RET: _mesa_fprintf(f, "RET (%s%s)", _mesa_condcode_string(inst->DstReg.CondMask), _mesa_swizzle_string(inst->DstReg.CondSwizzle, 0, GL_FALSE)); fprint_comment(f, inst); break; case OPCODE_END: _mesa_fprintf(f, "END\n"); break; case OPCODE_NOP: if (mode == PROG_PRINT_DEBUG) { _mesa_fprintf(f, "NOP"); fprint_comment(f, inst); } else if (inst->Comment) { /* ARB/NV extensions don't have NOP instruction */ _mesa_fprintf(f, "# %s\n", inst->Comment); } break; /* XXX may need other special-case instructions */ default: if (inst->Opcode < MAX_OPCODE) { /* typical alu instruction */ fprint_alu_instruction(f, inst, _mesa_opcode_string(inst->Opcode), _mesa_num_inst_src_regs(inst->Opcode), mode, prog); } else { _mesa_fprintf(f, "Other opcode %d\n", inst->Opcode); } break; } return indent; }
/** * Try to inject the destination of mov as the destination of inst and recompute * the swizzles operators for the sources of inst if required. Return GL_TRUE * of the substitution was possible, GL_FALSE otherwise */ static GLboolean _mesa_merge_mov_into_inst(struct prog_instruction *inst, const struct prog_instruction *mov) { /* Indirection table which associates destination and source components for * the mov instruction */ const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK); /* Some components are not written by inst. We cannot remove the mov */ if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; inst->SaturateMode |= mov->SaturateMode; /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only * consider completely active sources and destinations */ switch (inst->Opcode) { /* Carstesian instructions: we compute the swizzle */ case OPCODE_MOV: case OPCODE_MIN: case OPCODE_MAX: case OPCODE_ABS: case OPCODE_ADD: case OPCODE_MAD: case OPCODE_MUL: case OPCODE_SUB: { GLuint dst_to_src_comp[4] = {0,0,0,0}; GLuint dst_comp, arg; for (dst_comp = 0; dst_comp < 4; ++dst_comp) { if (mov->DstReg.WriteMask & (1 << dst_comp)) { const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp); ASSERT(src_comp < 4); dst_to_src_comp[dst_comp] = src_comp; } } /* Patch each source of the instruction */ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) { const GLuint arg_swz = inst->SrcReg[arg].Swizzle; inst->SrcReg[arg].Swizzle = 0; /* Reset each active component of the swizzle */ for (dst_comp = 0; dst_comp < 4; ++dst_comp) { GLuint src_comp, arg_comp; if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0) continue; src_comp = dst_to_src_comp[dst_comp]; ASSERT(src_comp < 4); arg_comp = GET_SWZ(arg_swz, src_comp); ASSERT(arg_comp < 4); inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp); } } inst->DstReg = mov->DstReg; return GL_TRUE; } /* Dot products and scalar instructions: we only change the destination */ case OPCODE_RCP: case OPCODE_SIN: case OPCODE_COS: case OPCODE_RSQ: case OPCODE_POW: case OPCODE_EX2: case OPCODE_LOG: case OPCODE_DP2: case OPCODE_DP3: case OPCODE_DP4: inst->DstReg = mov->DstReg; return GL_TRUE; /* All other instructions require fully active components with no swizzle */ default: if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW || inst->DstReg.WriteMask != WRITEMASK_XYZW) return GL_FALSE; inst->DstReg = mov->DstReg; return GL_TRUE; } }
static GLboolean brwProgramStringNotify( struct gl_context *ctx, GLenum target, struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); int i; if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; struct brw_fragment_program *newFP = brw_fragment_program(fprog); const struct brw_fragment_program *curFP = brw_fragment_program_const(brw->fragment_program); struct gl_shader_program *shader_program; if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; /* Don't reject fragment shaders for their Mesa IR state when we're * using the new FS backend. */ shader_program = _mesa_lookup_shader_program(ctx, prog->Id); if (shader_program && shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT]) { return GL_TRUE; } } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; struct brw_vertex_program *newVP = brw_vertex_program(vprog); const struct brw_vertex_program *curVP = brw_vertex_program_const(brw->vertex_program); if (newVP == curVP) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; if (newVP->program.IsPositionInvariant) { _mesa_insert_mvp_code(ctx, &newVP->program); } newVP->id = brw->program_id++; /* Also tell tnl about it: */ _tnl_program_string(ctx, target, prog); } /* Reject programs with subroutines, which are totally broken at the moment * (all program flows return when any program flow returns, and * the VS also hangs if a function call calls a function. * * See piglit glsl-{vs,fs}-functions-[23] tests. */ for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; int r; if (prog->Instructions[i].Opcode == OPCODE_CAL) { shader_error(ctx, prog, "i965 driver doesn't yet support uninlined function " "calls. Move to using a single return statement at " "the end of the function to work around it.\n"); return GL_FALSE; } if (prog->Instructions[i].Opcode == OPCODE_RET) { shader_error(ctx, prog, "i965 driver doesn't yet support \"return\" " "from main().\n"); return GL_FALSE; } for (r = 0; r < _mesa_num_inst_src_regs(inst->Opcode); r++) { if (prog->Instructions[i].SrcReg[r].RelAddr && prog->Instructions[i].SrcReg[r].File == PROGRAM_INPUT) { shader_error(ctx, prog, "Variable indexing of shader inputs unsupported\n"); return GL_FALSE; } } if (target == GL_FRAGMENT_PROGRAM_ARB && prog->Instructions[i].DstReg.RelAddr && prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) { shader_error(ctx, prog, "Variable indexing of FS outputs unsupported\n"); return GL_FALSE; } if (target == GL_FRAGMENT_PROGRAM_ARB) { if ((prog->Instructions[i].DstReg.RelAddr && prog->Instructions[i].DstReg.File == PROGRAM_TEMPORARY) || (prog->Instructions[i].SrcReg[0].RelAddr && prog->Instructions[i].SrcReg[0].File == PROGRAM_TEMPORARY) || (prog->Instructions[i].SrcReg[1].RelAddr && prog->Instructions[i].SrcReg[1].File == PROGRAM_TEMPORARY) || (prog->Instructions[i].SrcReg[2].RelAddr && prog->Instructions[i].SrcReg[2].File == PROGRAM_TEMPORARY)) { shader_error(ctx, prog, "Variable indexing of variable arrays in the FS " "unsupported\n"); return GL_FALSE; } } } return GL_TRUE; }
/** * Try to remove use of extraneous MOV instructions, to free them up for dead * code removal. */ static void _mesa_remove_extra_move_use(struct gl_program *prog) { GLuint i, j; if (dbg) { printf("Optimize: Begin remove extra move use\n"); _mesa_print_program(prog); } /* * Look for sequences such as this: * MOV tmpX, arg0; * ... * FOO tmpY, tmpX, arg1; * and convert into: * MOV tmpX, arg0; * ... * FOO tmpY, arg0, arg1; */ for (i = 0; i + 1 < prog->NumInstructions; i++) { const struct prog_instruction *mov = prog->Instructions + i; GLuint dst_mask, src_mask; if (can_upward_mov_be_modifed(mov) == GL_FALSE) continue; /* Scanning the code, we maintain the components which are still active in * these two masks */ dst_mask = mov->DstReg.WriteMask; src_mask = get_src_arg_mask(mov, 0, NO_MASK); /* Walk through remaining instructions until the or src reg gets * rewritten or we get into some flow-control, eliminating the use of * this MOV. */ for (j = i + 1; j < prog->NumInstructions; j++) { struct prog_instruction *inst2 = prog->Instructions + j; GLuint arg; if (_mesa_is_flow_control_opcode(inst2->Opcode)) break; /* First rewrite this instruction's args if appropriate. */ for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { GLuint comp, read_mask; if (inst2->SrcReg[arg].File != mov->DstReg.File || inst2->SrcReg[arg].Index != mov->DstReg.Index || inst2->SrcReg[arg].RelAddr || inst2->SrcReg[arg].Abs) continue; read_mask = get_src_arg_mask(inst2, arg, NO_MASK); /* Adjust the swizzles of inst2 to point at MOV's source if ALL the * components read still come from the mov instructions */ if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) && (read_mask & dst_mask) == read_mask) { for (comp = 0; comp < 4; comp++) { const GLuint inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); inst2->SrcReg[arg].Swizzle |= s << (3 * comp); inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> inst2_swz) & 0x1) << comp); } inst2->SrcReg[arg].File = mov->SrcReg[0].File; inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; } } /* The source of MOV is written. This potentially deactivates some * components from the src and dst of the MOV instruction */ if (inst2->DstReg.File == mov->DstReg.File && (inst2->DstReg.RelAddr || inst2->DstReg.Index == mov->DstReg.Index)) { dst_mask &= ~inst2->DstReg.WriteMask; src_mask = get_src_arg_mask(mov, 0, dst_mask); } /* Idem when the destination of mov is written */ if (inst2->DstReg.File == mov->SrcReg[0].File && (inst2->DstReg.RelAddr || inst2->DstReg.Index == mov->SrcReg[0].Index)) { src_mask &= ~inst2->DstReg.WriteMask; dst_mask &= get_dst_mask_for_mov(mov, src_mask); } if (dst_mask == 0) break; } } if (dbg) { printf("Optimize: End remove extra move use.\n"); /*_mesa_print_program(prog);*/ } }
/** * Remove dead instructions from the given program. * This is very primitive for now. Basically look for temp registers * that are written to but never read. Remove any instructions that * write to such registers. Be careful with condition code setters. */ static GLboolean _mesa_remove_dead_code_global(struct gl_program *prog) { GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4]; GLboolean *removeInst; /* per-instruction removal flag */ GLuint i, rem = 0, comp; memset(tempRead, 0, sizeof(tempRead)); if (dbg) { printf("Optimize: Begin dead code removal\n"); /*_mesa_print_program(prog);*/ } removeInst = (GLboolean *) calloc(1, prog->NumInstructions * sizeof(GLboolean)); /* Determine which temps are read and written */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; /* check src regs */ for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { const GLuint index = inst->SrcReg[j].Index; GLuint read_mask; ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); read_mask = get_src_arg_mask(inst, j, NO_MASK); if (inst->SrcReg[j].RelAddr) { if (dbg) printf("abort remove dead code (indirect temp)\n"); goto done; } for (comp = 0; comp < 4; comp++) { const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp); ASSERT(swz < 4); if ((read_mask & (1 << swz)) == 0) continue; if (swz <= SWIZZLE_W) tempRead[index][swz] = GL_TRUE; } } } /* check dst reg */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { const GLuint index = inst->DstReg.Index; ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); if (inst->DstReg.RelAddr) { if (dbg) printf("abort remove dead code (indirect temp)\n"); goto done; } if (inst->CondUpdate) { /* If we're writing to this register and setting condition * codes we cannot remove the instruction. Prevent removal * by setting the 'read' flag. */ tempRead[index][0] = GL_TRUE; tempRead[index][1] = GL_TRUE; tempRead[index][2] = GL_TRUE; tempRead[index][3] = GL_TRUE; } } } /* find instructions that write to dead registers, flag for removal */ for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode); if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) { GLint chan, index = inst->DstReg.Index; for (chan = 0; chan < 4; chan++) { if (!tempRead[index][chan] && inst->DstReg.WriteMask & (1 << chan)) { if (dbg) { printf("Remove writemask on %u.%c\n", i, chan == 3 ? 'w' : 'x' + chan); } inst->DstReg.WriteMask &= ~(1 << chan); rem++; } } if (inst->DstReg.WriteMask == 0) { /* If we cleared all writes, the instruction can be removed. */ if (dbg) printf("Remove instruction %u: \n", i); removeInst[i] = GL_TRUE; } } } /* now remove the instructions which aren't needed */ rem = remove_instructions(prog, removeInst); if (dbg) { printf("Optimize: End dead code removal.\n"); printf(" %u channel writes removed\n", rem); printf(" %u instructions removed\n", rem); /*_mesa_print_program(prog);*/ } done: free(removeInst); return rem != 0; }