예제 #1
0
/* Walks a basic block and does copy propagation on it using the acp
 * list.
 */
bool
fs_visitor::opt_copy_propagate_local(void *mem_ctx,
				     fs_bblock *block, exec_list *acp)
{
   bool progress = false;

   for (fs_inst *inst = block->start;
	inst != block->end->next;
	inst = (fs_inst *)inst->next) {

      /* Try propagating into this instruction. */
      foreach_list(entry_node, acp) {
	 acp_entry *entry = (acp_entry *)entry_node;

	 for (int i = 0; i < 3; i++) {
	    if (try_copy_propagate(inst, i, entry))
	       progress = true;
	 }
      }

      /* kill the destination from the ACP */
      if (inst->dst.file == GRF) {
	 int start_offset = inst->dst.reg_offset;
	 int end_offset = start_offset + inst->regs_written();

	 foreach_list_safe(entry_node, acp) {
	    acp_entry *entry = (acp_entry *)entry_node;

	    if (entry->dst.file == GRF &&
		entry->dst.reg == inst->dst.reg &&
		entry->dst.reg_offset >= start_offset &&
		entry->dst.reg_offset < end_offset) {
	       entry->remove();
	       continue;
	    }
	    if (entry->src.file == GRF &&
		entry->src.reg == inst->dst.reg &&
		entry->src.reg_offset >= start_offset &&
		entry->src.reg_offset < end_offset) {
	       entry->remove();
	    }
	 }
예제 #2
0
bool
vec4_visitor::opt_copy_propagation(bool do_constant_prop)
{
   bool progress = false;
   struct copy_entry entries[alloc.total_size];

   memset(&entries, 0, sizeof(entries));

   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
      /* This pass only works on basic blocks.  If there's flow
       * control, throw out all our information and start from
       * scratch.
       *
       * This should really be fixed by using a structure like in
       * src/glsl/opt_copy_propagation.cpp to track available copies.
       */
      if (!is_dominated_by_previous_instruction(inst)) {
	 memset(&entries, 0, sizeof(entries));
	 continue;
      }

      /* For each source arg, see if each component comes from a copy
       * from the same type file (IMM, GRF, UNIFORM), and try
       * optimizing out access to the copy result
       */
      for (int i = 2; i >= 0; i--) {
	 /* Copied values end up in GRFs, and we don't track reladdr
	  * accesses.
	  */
	 if (inst->src[i].file != GRF ||
	     inst->src[i].reladdr)
	    continue;

         /* We only handle single-register copies. */
         if (inst->regs_read(i) != 1)
            continue;

	 int reg = (alloc.offsets[inst->src[i].reg] +
		    inst->src[i].reg_offset);

	 /* Find the regs that each swizzle component came from.
	  */
         struct copy_entry entry;
         memset(&entry, 0, sizeof(copy_entry));
	 int c;
	 for (c = 0; c < 4; c++) {
            int channel = BRW_GET_SWZ(inst->src[i].swizzle, c);
            entry.value[c] = entries[reg].value[channel];

	    /* If there's no available copy for this channel, bail.
	     * We could be more aggressive here -- some channels might
	     * not get used based on the destination writemask.
	     */
	    if (!entry.value[c])
	       break;

            entry.saturatemask |=
               (entries[reg].saturatemask & (1 << channel) ? 1 : 0) << c;

	    /* We'll only be able to copy propagate if the sources are
	     * all from the same file -- there's no ability to swizzle
	     * 0 or 1 constants in with source registers like in i915.
	     */
	    if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file)
	       break;
	 }

	 if (c != 4)
	    continue;

         if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
            progress = true;

	 if (try_copy_propagate(devinfo, inst, i, &entry))
	    progress = true;
      }

      /* Track available source registers. */
      if (inst->dst.file == GRF) {
	 const int reg =
	    alloc.offsets[inst->dst.reg] + inst->dst.reg_offset;

	 /* Update our destination's current channel values.  For a direct copy,
	  * the value is the newly propagated source.  Otherwise, we don't know
	  * the new value, so clear it.
	  */
	 bool direct_copy = is_direct_copy(inst);
         entries[reg].saturatemask &= ~inst->dst.writemask;
	 for (int i = 0; i < 4; i++) {
	    if (inst->dst.writemask & (1 << i)) {
               entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
               entries[reg].saturatemask |=
                  inst->saturate && direct_copy ? 1 << i : 0;
            }
	 }

	 /* Clear the records for any registers whose current value came from
	  * our destination's updated channels, as the two are no longer equal.
	  */
	 if (inst->dst.reladdr)
	    memset(&entries, 0, sizeof(entries));
	 else {
	    for (unsigned i = 0; i < alloc.total_size; i++) {
	       for (int j = 0; j < 4; j++) {
		  if (is_channel_updated(inst, entries[i].value, j)) {
		     entries[i].value[j] = NULL;
		     entries[i].saturatemask &= ~(1 << j);
                  }
	       }
	    }
	 }
      }
   }

   if (progress)
      invalidate_live_intervals();

   return progress;
}
예제 #3
0
bool
vec4_visitor::opt_copy_propagation(bool do_constant_prop)
{
   /* If we are in dual instanced or single mode, then attributes are going
    * to be interleaved, so one register contains two attribute slots.
    */
   const int attributes_per_reg =
      prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
   bool progress = false;
   struct copy_entry entries[alloc.total_size];

   memset(&entries, 0, sizeof(entries));

   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
      /* This pass only works on basic blocks.  If there's flow
       * control, throw out all our information and start from
       * scratch.
       *
       * This should really be fixed by using a structure like in
       * src/glsl/opt_copy_propagation.cpp to track available copies.
       */
      if (!is_dominated_by_previous_instruction(inst)) {
	 memset(&entries, 0, sizeof(entries));
	 continue;
      }

      /* For each source arg, see if each component comes from a copy
       * from the same type file (IMM, VGRF, UNIFORM), and try
       * optimizing out access to the copy result
       */
      for (int i = 2; i >= 0; i--) {
	 /* Copied values end up in GRFs, and we don't track reladdr
	  * accesses.
	  */
	 if (inst->src[i].file != VGRF ||
	     inst->src[i].reladdr)
	    continue;

         /* We only handle register-aligned single GRF copies. */
         if (inst->size_read(i) != REG_SIZE ||
             inst->src[i].offset % REG_SIZE)
            continue;

         const unsigned reg = (alloc.offsets[inst->src[i].nr] +
                               inst->src[i].offset / REG_SIZE);
         const copy_entry &entry = entries[reg];

         if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
            progress = true;
         else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
	    progress = true;
      }

      /* Track available source registers. */
      if (inst->dst.file == VGRF) {
	 const int reg =
            alloc.offsets[inst->dst.nr] + inst->dst.offset / REG_SIZE;

	 /* Update our destination's current channel values.  For a direct copy,
	  * the value is the newly propagated source.  Otherwise, we don't know
	  * the new value, so clear it.
	  */
	 bool direct_copy = is_direct_copy(inst);
         entries[reg].saturatemask &= ~inst->dst.writemask;
	 for (int i = 0; i < 4; i++) {
	    if (inst->dst.writemask & (1 << i)) {
               entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
               entries[reg].saturatemask |=
                  inst->saturate && direct_copy ? 1 << i : 0;
            }
	 }

	 /* Clear the records for any registers whose current value came from
	  * our destination's updated channels, as the two are no longer equal.
	  */
	 if (inst->dst.reladdr)
	    memset(&entries, 0, sizeof(entries));
	 else {
	    for (unsigned i = 0; i < alloc.total_size; i++) {
	       for (int j = 0; j < 4; j++) {
		  if (is_channel_updated(inst, entries[i].value, j)) {
		     entries[i].value[j] = NULL;
		     entries[i].saturatemask &= ~(1 << j);
                  }
	       }
	    }
	 }
      }
   }

   if (progress)
      invalidate_live_intervals();

   return progress;
}