/* Walks a basic block and does copy propagation on it using the acp * list. */ bool fs_visitor::opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, exec_list *acp) { bool progress = false; for (fs_inst *inst = block->start; inst != block->end->next; inst = (fs_inst *)inst->next) { /* Try propagating into this instruction. */ foreach_list(entry_node, acp) { acp_entry *entry = (acp_entry *)entry_node; for (int i = 0; i < 3; i++) { if (try_copy_propagate(inst, i, entry)) progress = true; } } /* kill the destination from the ACP */ if (inst->dst.file == GRF) { int start_offset = inst->dst.reg_offset; int end_offset = start_offset + inst->regs_written(); foreach_list_safe(entry_node, acp) { acp_entry *entry = (acp_entry *)entry_node; if (entry->dst.file == GRF && entry->dst.reg == inst->dst.reg && entry->dst.reg_offset >= start_offset && entry->dst.reg_offset < end_offset) { entry->remove(); continue; } if (entry->src.file == GRF && entry->src.reg == inst->dst.reg && entry->src.reg_offset >= start_offset && entry->src.reg_offset < end_offset) { entry->remove(); } }
bool vec4_visitor::opt_copy_propagation(bool do_constant_prop) { bool progress = false; struct copy_entry entries[alloc.total_size]; memset(&entries, 0, sizeof(entries)); foreach_block_and_inst(block, vec4_instruction, inst, cfg) { /* This pass only works on basic blocks. If there's flow * control, throw out all our information and start from * scratch. * * This should really be fixed by using a structure like in * src/glsl/opt_copy_propagation.cpp to track available copies. */ if (!is_dominated_by_previous_instruction(inst)) { memset(&entries, 0, sizeof(entries)); continue; } /* For each source arg, see if each component comes from a copy * from the same type file (IMM, GRF, UNIFORM), and try * optimizing out access to the copy result */ for (int i = 2; i >= 0; i--) { /* Copied values end up in GRFs, and we don't track reladdr * accesses. */ if (inst->src[i].file != GRF || inst->src[i].reladdr) continue; /* We only handle single-register copies. */ if (inst->regs_read(i) != 1) continue; int reg = (alloc.offsets[inst->src[i].reg] + inst->src[i].reg_offset); /* Find the regs that each swizzle component came from. */ struct copy_entry entry; memset(&entry, 0, sizeof(copy_entry)); int c; for (c = 0; c < 4; c++) { int channel = BRW_GET_SWZ(inst->src[i].swizzle, c); entry.value[c] = entries[reg].value[channel]; /* If there's no available copy for this channel, bail. * We could be more aggressive here -- some channels might * not get used based on the destination writemask. */ if (!entry.value[c]) break; entry.saturatemask |= (entries[reg].saturatemask & (1 << channel) ? 1 : 0) << c; /* We'll only be able to copy propagate if the sources are * all from the same file -- there's no ability to swizzle * 0 or 1 constants in with source registers like in i915. */ if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file) break; } if (c != 4) continue; if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry)) progress = true; if (try_copy_propagate(devinfo, inst, i, &entry)) progress = true; } /* Track available source registers. */ if (inst->dst.file == GRF) { const int reg = alloc.offsets[inst->dst.reg] + inst->dst.reg_offset; /* Update our destination's current channel values. For a direct copy, * the value is the newly propagated source. Otherwise, we don't know * the new value, so clear it. */ bool direct_copy = is_direct_copy(inst); entries[reg].saturatemask &= ~inst->dst.writemask; for (int i = 0; i < 4; i++) { if (inst->dst.writemask & (1 << i)) { entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL; entries[reg].saturatemask |= inst->saturate && direct_copy ? 1 << i : 0; } } /* Clear the records for any registers whose current value came from * our destination's updated channels, as the two are no longer equal. */ if (inst->dst.reladdr) memset(&entries, 0, sizeof(entries)); else { for (unsigned i = 0; i < alloc.total_size; i++) { for (int j = 0; j < 4; j++) { if (is_channel_updated(inst, entries[i].value, j)) { entries[i].value[j] = NULL; entries[i].saturatemask &= ~(1 << j); } } } } } } if (progress) invalidate_live_intervals(); return progress; }
bool vec4_visitor::opt_copy_propagation(bool do_constant_prop) { /* If we are in dual instanced or single mode, then attributes are going * to be interleaved, so one register contains two attribute slots. */ const int attributes_per_reg = prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; bool progress = false; struct copy_entry entries[alloc.total_size]; memset(&entries, 0, sizeof(entries)); foreach_block_and_inst(block, vec4_instruction, inst, cfg) { /* This pass only works on basic blocks. If there's flow * control, throw out all our information and start from * scratch. * * This should really be fixed by using a structure like in * src/glsl/opt_copy_propagation.cpp to track available copies. */ if (!is_dominated_by_previous_instruction(inst)) { memset(&entries, 0, sizeof(entries)); continue; } /* For each source arg, see if each component comes from a copy * from the same type file (IMM, VGRF, UNIFORM), and try * optimizing out access to the copy result */ for (int i = 2; i >= 0; i--) { /* Copied values end up in GRFs, and we don't track reladdr * accesses. */ if (inst->src[i].file != VGRF || inst->src[i].reladdr) continue; /* We only handle register-aligned single GRF copies. */ if (inst->size_read(i) != REG_SIZE || inst->src[i].offset % REG_SIZE) continue; const unsigned reg = (alloc.offsets[inst->src[i].nr] + inst->src[i].offset / REG_SIZE); const copy_entry &entry = entries[reg]; if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry)) progress = true; else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg)) progress = true; } /* Track available source registers. */ if (inst->dst.file == VGRF) { const int reg = alloc.offsets[inst->dst.nr] + inst->dst.offset / REG_SIZE; /* Update our destination's current channel values. For a direct copy, * the value is the newly propagated source. Otherwise, we don't know * the new value, so clear it. */ bool direct_copy = is_direct_copy(inst); entries[reg].saturatemask &= ~inst->dst.writemask; for (int i = 0; i < 4; i++) { if (inst->dst.writemask & (1 << i)) { entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL; entries[reg].saturatemask |= inst->saturate && direct_copy ? 1 << i : 0; } } /* Clear the records for any registers whose current value came from * our destination's updated channels, as the two are no longer equal. */ if (inst->dst.reladdr) memset(&entries, 0, sizeof(entries)); else { for (unsigned i = 0; i < alloc.total_size; i++) { for (int j = 0; j < 4; j++) { if (is_channel_updated(inst, entries[i].value, j)) { entries[i].value[j] = NULL; entries[i].saturatemask &= ~(1 << j); } } } } } } if (progress) invalidate_live_intervals(); return progress; }