Ejemplo n.º 1
0
const unsigned *brw_get_program( struct brw_compile *p,
			       unsigned *sz )
{
   brw_compact_instructions(p);

   *sz = p->next_insn_offset;
   return (const unsigned *)p->store;
}
Ejemplo n.º 2
0
void
brw_codegen_ff_gs_prog(struct brw_context *brw,
                       struct brw_ff_gs_prog_key *key)
{
   struct brw_ff_gs_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;

   memset(&c, 0, sizeof(c));

   c.key = *key;
   c.vue_map = brw->vs.prog_data->base.vue_map;
   c.nr_regs = (c.vue_map.num_slots + 1)/2;

   mem_ctx = ralloc_context(NULL);

   /* Begin the compilation:
    */
   brw_init_codegen(brw->intelScreen->devinfo, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.
    */
   brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE);

   if (brw->gen >= 6) {
      unsigned num_verts;
      bool check_edge_flag;
      /* On Sandybridge, we use the GS for implementing transform feedback
       * (called "Stream Out" in the PRM).
       */
      switch (key->primitive) {
      case _3DPRIM_POINTLIST:
         num_verts = 1;
         check_edge_flag = false;
	 break;
      case _3DPRIM_LINELIST:
      case _3DPRIM_LINESTRIP:
      case _3DPRIM_LINELOOP:
         num_verts = 2;
         check_edge_flag = false;
	 break;
      case _3DPRIM_TRILIST:
      case _3DPRIM_TRIFAN:
      case _3DPRIM_TRISTRIP:
      case _3DPRIM_RECTLIST:
	 num_verts = 3;
         check_edge_flag = false;
         break;
      case _3DPRIM_QUADLIST:
      case _3DPRIM_QUADSTRIP:
      case _3DPRIM_POLYGON:
         num_verts = 3;
         check_edge_flag = true;
         break;
      default:
	 unreachable("Unexpected primitive type in Gen6 SOL program.");
      }
      gen6_sol_program(&c, key, num_verts, check_edge_flag);
   } else {
      /* On Gen4-5, we use the GS to decompose certain types of primitives.
       * Note that primitives which don't require a GS program have already
       * been weeded out by now.
       */
      switch (key->primitive) {
      case _3DPRIM_QUADLIST:
	 brw_ff_gs_quads( &c, key );
	 break;
      case _3DPRIM_QUADSTRIP:
	 brw_ff_gs_quad_strip( &c, key );
	 break;
      case _3DPRIM_LINELOOP:
	 brw_ff_gs_lines( &c );
	 break;
      default:
	 ralloc_free(mem_ctx);
	 return;
      }
   }

   brw_compact_instructions(&c.func, 0, 0, NULL);

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
      fprintf(stderr, "gs:\n");
      brw_disassemble(brw->intelScreen->devinfo, c.func.store,
                      0, program_size, stderr);
      fprintf(stderr, "\n");
    }

   brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data);
   ralloc_free(mem_ctx);
}
Ejemplo n.º 3
0
static void compile_clip_prog( struct brw_context *brw,
			     struct brw_clip_prog_key *key )
{
   struct brw_clip_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;

   memset(&c, 0, sizeof(c));

   mem_ctx = ralloc_context(NULL);

   /* Begin the compilation:
    */
   brw_init_codegen(&brw->screen->devinfo, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

   c.key = *key;
   c.vue_map = brw->vue_map_geom_out;

   /* nr_regs is the number of registers filled by reading data from the VUE.
    * This program accesses the entire VUE, so nr_regs needs to be the size of
    * the VUE (measured in pairs, since two slots are stored in each
    * register).
    */
   c.nr_regs = (c.vue_map.num_slots + 1)/2;

   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.
    */
   brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Would ideally have the option of producing a program which could
    * do all three:
    */
   switch (key->primitive) {
   case GL_TRIANGLES:
      if (key->do_unfilled)
	 brw_emit_unfilled_clip( &c );
      else
	 brw_emit_tri_clip( &c );
      break;
   case GL_LINES:
      brw_emit_line_clip( &c );
      break;
   case GL_POINTS:
      brw_emit_point_clip( &c );
      break;
   default:
      unreachable("not reached");
   }

   brw_compact_instructions(&c.func, 0, 0, NULL);

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) {
      fprintf(stderr, "clip:\n");
      brw_disassemble(&brw->screen->devinfo, c.func.store,
                      0, program_size, stderr);
      fprintf(stderr, "\n");
   }

   brw_upload_cache(&brw->cache,
		    BRW_CACHE_CLIP_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->clip.prog_offset, &brw->clip.prog_data);
   ralloc_free(mem_ctx);
}
const GLuint *
brw_blorp_const_color_program::compile(struct brw_context *brw,
                                       GLuint *program_size)
{
   /* Set up prog_data */
   memset(&prog_data, 0, sizeof(prog_data));
   prog_data.persample_msaa_dispatch = false;

   alloc_regs();

   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);

   struct brw_reg mrf_rt_write =
      retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);

   uint32_t mlen, msg_type;
   if (key->use_simd16_replicated_data) {
      /* The message payload is a single register with the low 4 floats/ints
       * filled with the constant clear color.
       */
      brw_set_mask_control(&func, BRW_MASK_DISABLE);
      brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
      brw_set_mask_control(&func, BRW_MASK_ENABLE);

      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
      mlen = 1;
   } else {
      for (int i = 0; i < 4; i++) {
         /* The message payload is pairs of registers for 16 pixels each of r,
          * g, b, and a.
          */
         brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
         brw_MOV(&func,
                 brw_message_reg(base_mrf + i * 2),
                 brw_vec1_grf(clear_rgba.nr, i));
         brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
      }

      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
      mlen = 8;
   }

   /* Now write to the render target and terminate the thread */
   brw_fb_WRITE(&func,
                16 /* dispatch_width */,
                base_mrf /* msg_reg_nr */,
                mrf_rt_write /* src0 */,
                msg_type,
                BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
                mlen,
                0 /* response_length */,
                true /* eot */,
                false /* header present */);

   if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
      fprintf(stderr, "Native code for BLORP clear:\n");
      brw_disassemble(brw, &func.store, 0, func.next_insn_offset, stderr);
      fprintf(stderr, "\n");
   }

   brw_compact_instructions(&func);
   return brw_get_program(&func, program_size);
}
Ejemplo n.º 5
0
void
vec4_generator::generate_code(const cfg_t *cfg)
{
   struct annotation_info annotation;
   memset(&annotation, 0, sizeof(annotation));

   foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
      struct brw_reg src[3], dst;

      if (unlikely(debug_flag))
         annotate(brw, &annotation, cfg, inst, p->next_insn_offset);

      for (unsigned int i = 0; i < 3; i++) {
	 src[i] = inst->get_src(this->prog_data, i);
      }
      dst = inst->get_dst();

      brw_set_default_predicate_control(p, inst->predicate);
      brw_set_default_predicate_inverse(p, inst->predicate_inverse);
      brw_set_default_saturate(p, inst->saturate);
      brw_set_default_mask_control(p, inst->force_writemask_all);
      brw_set_default_acc_write_control(p, inst->writes_accumulator);

      unsigned pre_emit_nr_insn = p->nr_insn;

      generate_vec4_instruction(inst, dst, src);

      if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
         assert(p->nr_insn == pre_emit_nr_insn + 1 ||
                !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
                 "emitting more than 1 instruction");

         brw_inst *last = &p->store[pre_emit_nr_insn];

         brw_inst_set_cond_modifier(brw, last, inst->conditional_mod);
         brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear);
         brw_inst_set_no_dd_check(brw, last, inst->no_dd_check);
      }
   }

   brw_set_uip_jip(p);
   annotation_finalize(&annotation, p->next_insn_offset);

   int before_size = p->next_insn_offset;
   brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann);
   int after_size = p->next_insn_offset;

   if (unlikely(debug_flag)) {
      if (shader_prog) {
         fprintf(stderr, "Native code for %s vertex shader %d:\n",
                 shader_prog->Label ? shader_prog->Label : "unnamed",
                 shader_prog->Name);
      } else {
         fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
      }
      fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d"
                      " bytes (%.0f%%)\n",
              before_size / 16, before_size, after_size,
              100.0f * (before_size - after_size) / before_size);

      dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog);
      ralloc_free(annotation.ann);
   }
}
Ejemplo n.º 6
0
static void compile_sf_prog( struct brw_context *brw,
			     struct brw_sf_prog_key *key )
{
   struct brw_sf_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;

   memset(&c, 0, sizeof(c));

   mem_ctx = ralloc_context(NULL);
   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func, mem_ctx);

   c.key = *key;
   c.vue_map = brw->vue_map_geom_out;
   if (c.key.do_point_coord) {
      /*
       * gl_PointCoord is a FS instead of VS builtin variable, thus it's
       * not included in c.vue_map generated in VS stage. Here we add
       * it manually to let SF shader generate the needed interpolation
       * coefficient for FS shader.
       */
      c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
      c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
   }
   c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
   c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
   c.nr_setup_regs = c.nr_attr_regs;

   c.prog_data.urb_read_length = c.nr_attr_regs;
   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
   c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode);

   /* Which primitive?  Or all three?
    */
   switch (key->primitive) {
   case SF_TRIANGLES:
      c.nr_verts = 3;
      brw_emit_tri_setup( &c, true );
      break;
   case SF_LINES:
      c.nr_verts = 2;
      brw_emit_line_setup( &c, true );
      break;
   case SF_POINTS:
      c.nr_verts = 1;
      if (key->do_point_sprite)
	  brw_emit_point_sprite_setup( &c, true );
      else
	  brw_emit_point_setup( &c, true );
      break;
   case SF_UNFILLED_TRIS:
      c.nr_verts = 3;
      brw_emit_anyprim_setup( &c );
      break;
   default:
      unreachable("not reached");
   }

   brw_compact_instructions(&c.func, 0, 0, NULL);

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_SF)) {
      fprintf(stderr, "sf:\n");
      brw_disassemble(brw, c.func.store, 0, program_size, stderr);
      fprintf(stderr, "\n");
   }

   brw_upload_cache(&brw->cache, BRW_SF_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->sf.prog_offset, &brw->sf.prog_data);
   ralloc_free(mem_ctx);
}