/* Step 3: Work forwards once again. Perform register allocations, * taking into account instructions like TEX which require contiguous * result registers. Where necessary spill registers to scratch space * and reload later. */ void brw_wm_pass2( struct brw_wm_compile *c ) { GLuint insn; GLuint i; init_registers(c); for (insn = 0; insn < c->nr_insns; insn++) { struct brw_wm_instruction *inst = &c->instruction[insn]; /* Update registers' nextuse values: */ update_register_usage(c, insn); /* May need to unspill some args. */ load_args(c, inst); /* Allocate registers to hold results: */ switch (inst->opcode) { case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXP: alloc_contiguous_dest(c, inst->dst, 4, insn); break; default: for (i = 0; i < 4; i++) { if (inst->writemask & (1<<i)) { assert(inst->dst[i]); alloc_contiguous_dest(c, &inst->dst[i], 1, insn); } } break; } if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) { for (i = 0; i < 4; i++) if (inst->dst[i]) spill_value(c, inst->dst[i]); } } if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass2"); } c->state = PASS2_DONE; if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass2/done"); } }
/*********************************************************************** * PASS 0 * * Work forwards to give each calculated value a unique number. Where * an instruction produces duplicate values (eg DP3), all are given * the same number. * * Translate away swizzling and eliminate non-saturating moves. */ void brw_wm_pass0( struct brw_wm_compile *c ) { GLuint insn; c->nr_vreg = 0; c->nr_insns = 0; pass0_init_undef(c); pass0_init_payload(c); for (insn = 0; insn < c->nr_fp_insns; insn++) { const struct prog_instruction *inst = &c->prog_instructions[insn]; /* Optimize away moves, otherwise emit translated instruction: */ switch (inst->Opcode) { case OPCODE_MOV: case OPCODE_SWZ: if (!inst->SaturateMode) { pass0_precalc_mov(c, inst); } else { translate_insn(c, inst); } break; default: translate_insn(c, inst); break; } } if (unlikely(INTEL_DEBUG & DEBUG_WM)) { brw_wm_print_program(c, "pass0"); } }
/* Step two: Basically this is dead code elimination. * * Iterate backwards over instructions, noting which values * contribute to the final result. Adjust writemasks to only * calculate these values. */ void brw_wm_pass1( struct brw_wm_compile *c ) { GLint insn; for (insn = c->nr_insns-1; insn >= 0; insn--) { struct brw_wm_instruction *inst = &c->instruction[insn]; GLuint writemask; GLuint read0, read1, read2; if (inst->opcode == OPCODE_KIL) { track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ continue; } if (inst->opcode == WM_FB_WRITE) { track_arg(c, inst, 0, WRITEMASK_XYZW); track_arg(c, inst, 1, WRITEMASK_XYZW); if (c->source_depth_to_render_target && c->computes_depth) track_arg(c, inst, 2, WRITEMASK_Z); else track_arg(c, inst, 2, 0); continue; } /* Lookup all the registers which were written by this * instruction and get a mask of those that contribute to the output: */ writemask = get_tracked_mask(c, inst); if (!writemask) { GLuint arg; for (arg = 0; arg < 3; arg++) track_arg(c, inst, arg, 0); continue; } read0 = 0; read1 = 0; read2 = 0; /* Mark all inputs which contribute to the marked outputs: */ switch (inst->opcode) { case OPCODE_ABS: case OPCODE_FLR: case OPCODE_FRC: case OPCODE_MOV: case OPCODE_SSG: case OPCODE_SWZ: case OPCODE_TRUNC: read0 = writemask; break; case OPCODE_SUB: case OPCODE_SLT: case OPCODE_SLE: case OPCODE_SGE: case OPCODE_SGT: case OPCODE_SEQ: case OPCODE_SNE: case OPCODE_ADD: case OPCODE_MAX: case OPCODE_MIN: case OPCODE_MUL: read0 = writemask; read1 = writemask; break; case OPCODE_DDX: case OPCODE_DDY: read0 = writemask; break; case OPCODE_MAD: case OPCODE_CMP: case OPCODE_LRP: read0 = writemask; read1 = writemask; read2 = writemask; break; case OPCODE_XPD: if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; read1 = read0; break; case OPCODE_COS: case OPCODE_EX2: case OPCODE_LG2: case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: case OPCODE_SCS: case WM_CINTERP: case WM_PIXELXY: read0 = WRITEMASK_X; break; case OPCODE_POW: read0 = WRITEMASK_X; read1 = WRITEMASK_X; break; case OPCODE_TEX: case OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; case OPCODE_TXB: /* Shadow ignored for txb. */ read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; break; case WM_WPOSXY: read0 = writemask & WRITEMASK_XY; break; case WM_DELTAXY: read0 = writemask & WRITEMASK_XY; read1 = WRITEMASK_X; break; case WM_PIXELW: read0 = WRITEMASK_X; read1 = WRITEMASK_XY; break; case WM_LINTERP: read0 = WRITEMASK_X; read1 = WRITEMASK_XY; break; case WM_PINTERP: read0 = WRITEMASK_X; /* interpolant */ read1 = WRITEMASK_XY; /* deltas */ read2 = WRITEMASK_W; /* pixel w */ break; case OPCODE_DP2: read0 = WRITEMASK_XY; read1 = WRITEMASK_XY; break; case OPCODE_DP3: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZ; break; case OPCODE_DPH: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZW; break; case OPCODE_DP4: read0 = WRITEMASK_XYZW; read1 = WRITEMASK_XYZW; break; case OPCODE_LIT: read0 = WRITEMASK_XYW; break; case OPCODE_DST: case WM_FRONTFACING: default: break; } track_arg(c, inst, 0, read0); track_arg(c, inst, 1, read1); track_arg(c, inst, 2, read2); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) { brw_wm_print_program(c, "pass1"); } }