예제 #1
0
void
vec4_generator::generate_code(exec_list *instructions)
{
    int last_native_insn_offset = 0;
    const char *last_annotation_string = NULL;
    const void *last_annotation_ir = NULL;

    if (unlikely(debug_flag)) {
        if (shader_prog) {
            fprintf(stderr, "Native code for %s vertex shader %d:\n",
                    shader_prog->Label ? shader_prog->Label : "unnamed",
                    shader_prog->Name);
        } else {
            fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
        }
    }

    foreach_list(node, instructions) {
        vec4_instruction *inst = (vec4_instruction *)node;
        struct brw_reg src[3], dst;

        if (unlikely(debug_flag)) {
            if (last_annotation_ir != inst->ir) {
                last_annotation_ir = inst->ir;
                if (last_annotation_ir) {
                    fprintf(stderr, "   ");
                    if (shader_prog) {
                        ((ir_instruction *) last_annotation_ir)->fprint(stderr);
                    } else {
                        const prog_instruction *vpi;
                        vpi = (const prog_instruction *) inst->ir;
                        fprintf(stderr, "%d: ", (int)(vpi - prog->Instructions));
                        _mesa_fprint_instruction_opt(stderr, vpi, 0,
                                                     PROG_PRINT_DEBUG, NULL);
                    }
                    fprintf(stderr, "\n");
                }
            }
            if (last_annotation_string != inst->annotation) {
                last_annotation_string = inst->annotation;
                if (last_annotation_string)
                    fprintf(stderr, "   %s\n", last_annotation_string);
            }
        }

        for (unsigned int i = 0; i < 3; i++) {
            src[i] = inst->get_src(this->prog_data, i);
        }
        dst = inst->get_dst();

        brw_set_conditionalmod(p, inst->conditional_mod);
        brw_set_predicate_control(p, inst->predicate);
        brw_set_predicate_inverse(p, inst->predicate_inverse);
        brw_set_saturate(p, inst->saturate);
        brw_set_mask_control(p, inst->force_writemask_all);

        unsigned pre_emit_nr_insn = p->nr_insn;

        generate_vec4_instruction(inst, dst, src);

        if (inst->no_dd_clear || inst->no_dd_check) {
            assert(p->nr_insn == pre_emit_nr_insn + 1 ||
                   !"no_dd_check or no_dd_clear set for IR emitting more "
                   "than 1 instruction");

            struct brw_instruction *last = &p->store[pre_emit_nr_insn];

            if (inst->no_dd_clear)
                last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
            if (inst->no_dd_check)
                last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
        }

        if (unlikely(debug_flag)) {
            brw_dump_compile(p, stderr,
                             last_native_insn_offset, p->next_insn_offset);
        }

        last_native_insn_offset = p->next_insn_offset;
    }
예제 #2
0
void
brw_compact_instructions(struct brw_compile *p)
{
   struct brw_context *brw = p->brw;
   struct intel_context *intel = &brw->intel;
   void *store = p->store;
   /* For an instruction at byte offset 8*i before compaction, this is the number
    * of compacted instructions that preceded it.
    */
   int compacted_counts[p->next_insn_offset / 8];
   /* For an instruction at byte offset 8*i after compaction, this is the
    * 8-byte offset it was at before compaction.
    */
   int old_ip[p->next_insn_offset / 8];

   if (intel->gen < 6)
      return;

   int src_offset;
   int offset = 0;
   int compacted_count = 0;
   for (src_offset = 0; src_offset < p->nr_insn * 16;) {
      struct brw_instruction *src = store + src_offset;
      void *dst = store + offset;

      old_ip[offset / 8] = src_offset / 8;
      compacted_counts[src_offset / 8] = compacted_count;

      struct brw_instruction saved = *src;

      if (!src->header.cmpt_control &&
          brw_try_compact_instruction(p, dst, src)) {
         compacted_count++;

         if (INTEL_DEBUG) {
            struct brw_instruction uncompacted;
            brw_uncompact_instruction(intel, &uncompacted, dst);
            if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
               brw_debug_compact_uncompact(intel, &saved, &uncompacted);
            }
         }

         offset += 8;
         src_offset += 16;
      } else {
         int size = src->header.cmpt_control ? 8 : 16;

         /* It appears that the end of thread SEND instruction needs to be
          * aligned, or the GPU hangs.
          */
         if ((src->header.opcode == BRW_OPCODE_SEND ||
              src->header.opcode == BRW_OPCODE_SENDC) &&
             src->bits3.generic.end_of_thread &&
             (offset & 8) != 0) {
            struct brw_compact_instruction *align = store + offset;
            memset(align, 0, sizeof(*align));
            align->dw0.opcode = BRW_OPCODE_NOP;
            align->dw0.cmpt_ctrl = 1;
            offset += 8;
            old_ip[offset / 8] = src_offset / 8;
            dst = store + offset;
         }

         /* If we didn't compact this intruction, we need to move it down into
          * place.
          */
         if (offset != src_offset) {
            memmove(dst, src, size);
         }
         offset += size;
         src_offset += size;
      }
   }

   /* Fix up control flow offsets. */
   p->next_insn_offset = offset;
   for (offset = 0; offset < p->next_insn_offset;) {
      struct brw_instruction *insn = store + offset;
      int this_old_ip = old_ip[offset / 8];
      int this_compacted_count = compacted_counts[this_old_ip];
      int target_old_ip, target_compacted_count;

      switch (insn->header.opcode) {
      case BRW_OPCODE_BREAK:
      case BRW_OPCODE_CONTINUE:
      case BRW_OPCODE_HALT:
         update_uip_jip(insn, this_old_ip, compacted_counts);
         break;

      case BRW_OPCODE_IF:
      case BRW_OPCODE_ELSE:
      case BRW_OPCODE_ENDIF:
      case BRW_OPCODE_WHILE:
         if (intel->gen == 6) {
            target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count;
            target_compacted_count = compacted_counts[target_old_ip];
            insn->bits1.branch_gen6.jump_count -= (target_compacted_count -
                                                   this_compacted_count);
         } else {
            update_uip_jip(insn, this_old_ip, compacted_counts);
         }
         break;
      }

      if (insn->header.cmpt_control) {
         offset += 8;
      } else {
         offset += 16;
      }
   }

   /* p->nr_insn is counting the number of uncompacted instructions still, so
    * divide.  We do want to be sure there's a valid instruction in any
    * alignment padding, so that the next compression pass (for the FS 8/16
    * compile passes) parses correctly.
    */
   if (p->next_insn_offset & 8) {
      struct brw_compact_instruction *align = store + offset;
      memset(align, 0, sizeof(*align));
      align->dw0.opcode = BRW_OPCODE_NOP;
      align->dw0.cmpt_ctrl = 1;
      p->next_insn_offset += 8;
   }
   p->nr_insn = p->next_insn_offset / 16;

   if (0) {
      fprintf(stdout, "dumping compacted program\n");
      brw_dump_compile(p, stdout, 0, p->next_insn_offset);

      int cmp = 0;
      for (offset = 0; offset < p->next_insn_offset;) {
         struct brw_instruction *insn = store + offset;

         if (insn->header.cmpt_control) {
            offset += 8;
            cmp++;
         } else {
            offset += 16;
         }
      }
      fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
              cmp * 8 * 100 / (offset + cmp * 8));
   }
}
예제 #3
0
const GLuint *
brw_blorp_const_color_program::compile(struct brw_context *brw,
                                       GLuint *program_size)
{
   /* Set up prog_data */
   memset(&prog_data, 0, sizeof(prog_data));
   prog_data.persample_msaa_dispatch = false;

   alloc_regs();

   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);

   struct brw_reg mrf_rt_write =
      retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);

   uint32_t mlen, msg_type;
   if (key->use_simd16_replicated_data) {
      /* The message payload is a single register with the low 4 floats/ints
       * filled with the constant clear color.
       */
      brw_set_mask_control(&func, BRW_MASK_DISABLE);
      brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
      brw_set_mask_control(&func, BRW_MASK_ENABLE);

      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
      mlen = 1;
   } else {
      for (int i = 0; i < 4; i++) {
         /* The message payload is pairs of registers for 16 pixels each of r,
          * g, b, and a.
          */
         brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
         brw_MOV(&func,
                 brw_message_reg(base_mrf + i * 2),
                 brw_vec1_grf(clear_rgba.nr, i));
         brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
      }

      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
      mlen = 8;
   }

   /* Now write to the render target and terminate the thread */
   brw_fb_WRITE(&func,
                16 /* dispatch_width */,
                base_mrf /* msg_reg_nr */,
                mrf_rt_write /* src0 */,
                msg_type,
                BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
                mlen,
                0 /* response_length */,
                true /* eot */,
                false /* header present */);

   if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
      fprintf(stderr, "Native code for BLORP clear:\n");
      brw_dump_compile(&func, stderr, 0, func.next_insn_offset);
      fprintf(stderr, "\n");
   }
   return brw_get_program(&func, program_size);
}
예제 #4
0
void
vec4_generator::generate_code(exec_list *instructions)
{
   int last_native_insn_offset = 0;
   const char *last_annotation_string = NULL;
   const void *last_annotation_ir = NULL;

   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
      if (shader) {
         printf("Native code for vertex shader %d:\n", prog->Name);
      } else {
         printf("Native code for vertex program %d:\n", c->vp->program.Base.Id);
      }
   }

   foreach_list(node, instructions) {
      vec4_instruction *inst = (vec4_instruction *)node;
      struct brw_reg src[3], dst;

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 if (last_annotation_ir != inst->ir) {
	    last_annotation_ir = inst->ir;
	    if (last_annotation_ir) {
	       printf("   ");
               if (shader) {
                  ((ir_instruction *) last_annotation_ir)->print();
               } else {
                  const prog_instruction *vpi;
                  vpi = (const prog_instruction *) inst->ir;
                  printf("%d: ", (int)(vpi - vp->Base.Instructions));
                  _mesa_fprint_instruction_opt(stdout, vpi, 0,
                                               PROG_PRINT_DEBUG, NULL);
               }
	       printf("\n");
	    }
	 }
	 if (last_annotation_string != inst->annotation) {
	    last_annotation_string = inst->annotation;
	    if (last_annotation_string)
	       printf("   %s\n", last_annotation_string);
	 }
      }

      for (unsigned int i = 0; i < 3; i++) {
	 src[i] = inst->get_src(i);
      }
      dst = inst->get_dst();

      brw_set_conditionalmod(p, inst->conditional_mod);
      brw_set_predicate_control(p, inst->predicate);
      brw_set_predicate_inverse(p, inst->predicate_inverse);
      brw_set_saturate(p, inst->saturate);

      switch (inst->opcode) {
      case BRW_OPCODE_MOV:
	 brw_MOV(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ADD:
	 brw_ADD(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MUL:
	 brw_MUL(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MACH:
	 brw_set_acc_write_control(p, 1);
	 brw_MACH(p, dst, src[0], src[1]);
	 brw_set_acc_write_control(p, 0);
	 break;

      case BRW_OPCODE_FRC:
	 brw_FRC(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDD:
	 brw_RNDD(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDE:
	 brw_RNDE(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDZ:
	 brw_RNDZ(p, dst, src[0]);
	 break;

      case BRW_OPCODE_AND:
	 brw_AND(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_OR:
	 brw_OR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_XOR:
	 brw_XOR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_NOT:
	 brw_NOT(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ASR:
	 brw_ASR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHR:
	 brw_SHR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHL:
	 brw_SHL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_CMP:
	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
	 break;
      case BRW_OPCODE_SEL:
	 brw_SEL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DPH:
	 brw_DPH(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP4:
	 brw_DP4(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP3:
	 brw_DP3(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP2:
	 brw_DP2(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_IF:
	 if (inst->src[0].file != BAD_FILE) {
	    /* The instruction has an embedded compare (only allowed on gen6) */
	    assert(intel->gen == 6);
	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
	 } else {
	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
	    brw_inst->header.predicate_control = inst->predicate;
	 }
	 break;

      case BRW_OPCODE_ELSE:
	 brw_ELSE(p);
	 break;
      case BRW_OPCODE_ENDIF:
	 brw_ENDIF(p);
	 break;

      case BRW_OPCODE_DO:
	 brw_DO(p, BRW_EXECUTE_8);
	 break;

      case BRW_OPCODE_BREAK:
	 brw_BREAK(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;
      case BRW_OPCODE_CONTINUE:
	 /* FINISHME: We need to write the loop instruction support still. */
	 if (intel->gen >= 6)
	    gen6_CONT(p);
	 else
	    brw_CONT(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;

      case BRW_OPCODE_WHILE:
	 brw_WHILE(p);
	 break;

      default:
	 generate_vs_instruction(inst, dst, src);
	 break;
      }

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 brw_dump_compile(p, stdout,
			  last_native_insn_offset, p->next_insn_offset);
      }

      last_native_insn_offset = p->next_insn_offset;
   }