コード例 #1
0
ファイル: brw_vec4_generator.cpp プロジェクト: iquiw/xsrc
const unsigned *
vec4_generator::generate_assembly(const cfg_t *cfg,
                                  unsigned *assembly_size)
{
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   generate_code(cfg);

   return brw_get_program(p, assembly_size);
}
コード例 #2
0
ファイル: brw_vec4_generator.cpp プロジェクト: rib/mesa
const unsigned *
vec4_generator::generate_assembly(exec_list *instructions,
                                  unsigned *assembly_size)
{
    brw_set_default_access_mode(p, BRW_ALIGN_16);
    generate_code(instructions);

    return brw_get_program(p, assembly_size);
}
コード例 #3
0
ファイル: brw_sf.c プロジェクト: PatriceBlin/mesa
static void compile_sf_prog( struct brw_context *brw,
                             struct brw_sf_prog_key *key )
{
    struct brw_sf_compile c;
    const GLuint *program;
    void *mem_ctx;
    GLuint program_size;
    GLuint i;

    memset(&c, 0, sizeof(c));

    mem_ctx = ralloc_context(NULL);
    /* Begin the compilation:
     */
    brw_init_compile(brw, &c.func, mem_ctx);

    c.key = *key;
    c.vue_map = brw->vue_map_geom_out;
    if (c.key.do_point_coord) {
        /*
         * gl_PointCoord is a FS instead of VS builtin variable, thus it's
         * not included in c.vue_map generated in VS stage. Here we add
         * it manually to let SF shader generate the needed interpolation
         * coefficient for FS shader.
         */
        c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
        c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
    }
    c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
    c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
    c.nr_setup_regs = c.nr_attr_regs;

    c.prog_data.urb_read_length = c.nr_attr_regs;
    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
    c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode);

    /* Which primitive?  Or all three?
     */
    switch (key->primitive) {
    case SF_TRIANGLES:
        c.nr_verts = 3;
        brw_emit_tri_setup( &c, true );
        break;
    case SF_LINES:
        c.nr_verts = 2;
        brw_emit_line_setup( &c, true );
        break;
    case SF_POINTS:
        c.nr_verts = 1;
        if (key->do_point_sprite)
            brw_emit_point_sprite_setup( &c, true );
        else
            brw_emit_point_setup( &c, true );
        break;
    case SF_UNFILLED_TRIS:
        c.nr_verts = 3;
        brw_emit_anyprim_setup( &c );
        break;
    default:
        assert(0);
        return;
    }

    /* get the program
     */
    program = brw_get_program(&c.func, &program_size);

    if (unlikely(INTEL_DEBUG & DEBUG_SF)) {
        printf("sf:\n");
        for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
            brw_disasm(stdout, &((struct brw_instruction *)program)[i],
                       brw->gen);
        printf("\n");
    }

    brw_upload_cache(&brw->cache, BRW_SF_PROG,
                     &c.key, sizeof(c.key),
                     program, program_size,
                     &c.prog_data, sizeof(c.prog_data),
                     &brw->sf.prog_offset, &brw->sf.prog_data);
    ralloc_free(mem_ctx);
}
コード例 #4
0
ファイル: brw_sf.c プロジェクト: toastpp/toastpp
static void compile_sf_prog( struct brw_context *brw,
			     struct brw_sf_prog_key *key )
{
   GLcontext *ctx = &brw->intel.ctx;
   struct brw_sf_compile c;
   const GLuint *program;
   GLuint program_size;
   GLuint i, idx;

   memset(&c, 0, sizeof(c));

   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func);

   c.key = *key;
   c.nr_attrs = brw_count_bits(c.key.attrs);
   c.nr_attr_regs = (c.nr_attrs+1)/2;
   c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
   c.nr_setup_regs = (c.nr_setup_attrs+1)/2;

   c.prog_data.urb_read_length = c.nr_attr_regs;
   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;

   /* Construct map from attribute number to position in the vertex.
    */
   for (i = idx = 0; i < VERT_RESULT_MAX; i++) 
      if (c.key.attrs & (1<<i)) {
	 c.attr_to_idx[i] = idx;
	 c.idx_to_attr[idx] = i;
	 if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
            c.point_attrs[i].CoordReplace = 
               ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
	 }
         else {
            c.point_attrs[i].CoordReplace = GL_FALSE;
         }
	 idx++;
      }
   
   /* Which primitive?  Or all three? 
    */
   switch (key->primitive) {
   case SF_TRIANGLES:
      c.nr_verts = 3;
      brw_emit_tri_setup( &c, GL_TRUE );
      break;
   case SF_LINES:
      c.nr_verts = 2;
      brw_emit_line_setup( &c, GL_TRUE );
      break;
   case SF_POINTS:
      c.nr_verts = 1;
      if (key->do_point_sprite)
	  brw_emit_point_sprite_setup( &c, GL_TRUE );
      else
	  brw_emit_point_setup( &c, GL_TRUE );
      break;
   case SF_UNFILLED_TRIS:
      c.nr_verts = 3;
      brw_emit_anyprim_setup( &c );
      break;
   default:
      assert(0);
      return;
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   /* Upload
    */
   dri_bo_unreference(brw->sf.prog_bo);
   brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
				       &c.key, sizeof(c.key),
				       NULL, 0,
				       program, program_size,
				       &c.prog_data,
				       &brw->sf.prog_data );
}
コード例 #5
0
ファイル: brw_vs.c プロジェクト: jay8muel/Renderfusion
static bool
do_vs_prog(struct brw_context *brw,
	   struct gl_shader_program *prog,
	   struct brw_vertex_program *vp,
	   struct brw_vs_prog_key *key)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = &brw->intel;
   GLuint program_size;
   const GLuint *program;
   struct brw_vs_compile c;
   void *mem_ctx;
   int aux_size;
   int i;
   struct gl_shader *vs = NULL;

   if (prog)
      vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];

   memset(&c, 0, sizeof(c));
   memcpy(&c.key, key, sizeof(*key));

   mem_ctx = ralloc_context(NULL);

   brw_init_compile(brw, &c.func, mem_ctx);
   c.vp = vp;

   /* Allocate the references to the uniforms that will end up in the
    * prog_data associated with the compiled program, and which will be freed
    * by the state cache.
    */
   int param_count;
   if (vs) {
      /* We add padding around uniform values below vec4 size, with the worst
       * case being a float value that gets blown up to a vec4, so be
       * conservative here.
       */
      param_count = vs->num_uniform_components * 4;

      /* We also upload clip plane data as uniforms */
      param_count += MAX_CLIP_PLANES * 4;
   } else {
      param_count = vp->program.Base.Parameters->NumParameters * 4;
   }
   c.prog_data.param = rzalloc_array(NULL, const float *, param_count);
   c.prog_data.pull_param = rzalloc_array(NULL, const float *, param_count);

   c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
   c.prog_data.inputs_read = vp->program.Base.InputsRead;

   if (c.key.copy_edgeflag) {
      c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
      c.prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
   }

   /* Put dummy slots into the VUE for the SF to put the replaced
    * point sprite coords in.  We shouldn't need these dummy slots,
    * which take up precious URB space, but it would mean that the SF
    * doesn't get nice aligned pairs of input coords into output
    * coords, which would be a pain to handle.
    */
   for (i = 0; i < 8; i++) {
      if (c.key.point_coord_replace & (1 << i))
	 c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
   }

   brw_compute_vue_map(&c);

   if (0) {
      _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
			       true);
   }

   /* Emit GEN4 code.
    */
   if (prog) {
      if (!brw_vs_emit(prog, &c)) {
	 ralloc_free(mem_ctx);
	 return false;
      }
   } else {
      brw_old_vs_emit(&c);
   }

   if (c.prog_data.nr_pull_params)
      c.prog_data.num_surfaces = 1;
   if (c.vp->program.Base.SamplersUsed)
      c.prog_data.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT);
   if (prog &&
       prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) {
      c.prog_data.num_surfaces =
	 SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks);
   }

   /* Scratch space is used for register spilling */
   if (c.last_scratch) {
      perf_debug("Vertex shader triggered register spilling.  "
                 "Try reducing the number of live vec4 values to "
                 "improve performance.\n");

      c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);

      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
			 c.prog_data.total_scratch * brw->max_vs_threads);
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   /* We upload from &c.prog_data including the constant_map assuming
    * they're packed together.  It would be nice to have a
    * compile-time assert macro here.
    */
   assert(c.constant_map == (int8_t *)&c.prog_data +
	  sizeof(c.prog_data));
   assert(ctx->Const.VertexProgram.MaxNativeParameters ==
	  ARRAY_SIZE(c.constant_map));
   (void) ctx;

   aux_size = sizeof(c.prog_data);
   /* constant_map */
   aux_size += c.vp->program.Base.Parameters->NumParameters;

   brw_upload_cache(&brw->cache, BRW_VS_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, aux_size,
		    &brw->vs.prog_offset, &brw->vs.prog_data);
   ralloc_free(mem_ctx);

   return true;
}
コード例 #6
0
ファイル: brw_gs.c プロジェクト: aljen/haiku-opengl
static void compile_gs_prog( struct brw_context *brw,
			     struct brw_gs_prog_key *key )
{
   struct brw_gs_compile c;
   const unsigned *program;
   unsigned program_size;

   memset(&c, 0, sizeof(c));

   c.key = *key;

   /* Need to locate the two positions present in vertex + header.
    * These are currently hardcoded:
    */
   c.nr_attrs = brw_count_bits(c.key.attrs);
   c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
   c.nr_bytes = c.nr_regs * REG_SIZE;


   /* Begin the compilation:
    */
   brw_init_compile(&c.func);

   c.func.single_program_flow = 1;

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.
    */
   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Note that primitives which don't require a GS program have
    * already been weeded out by this stage:
    */
   switch (key->primitive) {
   case PIPE_PRIM_QUADS:
      brw_gs_quads( &c );
      break;
   case PIPE_PRIM_QUAD_STRIP:
      brw_gs_quad_strip( &c );
      break;
   case PIPE_PRIM_LINE_LOOP:
      brw_gs_lines( &c );
      break;
   case PIPE_PRIM_LINES:
      if (key->hint_gs_always)
	 brw_gs_lines( &c );
      else {
	 return;
      }
      break;
   case PIPE_PRIM_TRIANGLES:
      if (key->hint_gs_always)
	 brw_gs_tris( &c );
      else {
	 return;
      }
      break;
   case PIPE_PRIM_POINTS:
      if (key->hint_gs_always)
	 brw_gs_points( &c );
      else {
	 return;
      }
      break;
   default:
      return;
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   /* Upload
    */
   brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG],
					      &c.key,
					      sizeof(c.key),
					      program,
					      program_size,
					      &c.prog_data,
					      &brw->gs.prog_data );
}
コード例 #7
0
const GLuint *
brw_blorp_const_color_program::compile(struct brw_context *brw,
                                       GLuint *program_size)
{
   /* Set up prog_data */
   memset(&prog_data, 0, sizeof(prog_data));
   prog_data.persample_msaa_dispatch = false;

   alloc_regs();

   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);

   struct brw_reg mrf_rt_write =
      retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);

   uint32_t mlen, msg_type;
   if (key->use_simd16_replicated_data) {
      /* The message payload is a single register with the low 4 floats/ints
       * filled with the constant clear color.
       */
      brw_set_mask_control(&func, BRW_MASK_DISABLE);
      brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
      brw_set_mask_control(&func, BRW_MASK_ENABLE);

      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
      mlen = 1;
   } else {
      for (int i = 0; i < 4; i++) {
         /* The message payload is pairs of registers for 16 pixels each of r,
          * g, b, and a.
          */
         brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
         brw_MOV(&func,
                 brw_message_reg(base_mrf + i * 2),
                 brw_vec1_grf(clear_rgba.nr, i));
         brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
      }

      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
      mlen = 8;
   }

   /* Now write to the render target and terminate the thread */
   brw_fb_WRITE(&func,
                16 /* dispatch_width */,
                base_mrf /* msg_reg_nr */,
                mrf_rt_write /* src0 */,
                msg_type,
                BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
                mlen,
                0 /* response_length */,
                true /* eot */,
                false /* header present */);

   if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
      fprintf(stderr, "Native code for BLORP clear:\n");
      brw_dump_compile(&func, stderr, 0, func.next_insn_offset);
      fprintf(stderr, "\n");
   }
   return brw_get_program(&func, program_size);
}
コード例 #8
0
ファイル: brw_sf.c プロジェクト: aljen/haiku-opengl
static void compile_sf_prog( struct brw_context *brw,
			     struct brw_sf_prog_key *key )
{
   struct brw_sf_compile c;
   const unsigned *program;
   unsigned program_size;

   memset(&c, 0, sizeof(c));

   /* Begin the compilation:
    */
   brw_init_compile(&c.func);

   c.key = *key;


   c.nr_attrs = c.key.vp_output_count;
   c.nr_attr_regs = (c.nr_attrs+1)/2;

   c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */
   c.nr_setup_regs = (c.nr_setup_attrs+1)/2;

   c.prog_data.urb_read_length = c.nr_attr_regs;
   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;


   /* Which primitive?  Or all three?
    */
   switch (key->primitive) {
   case SF_TRIANGLES:
      c.nr_verts = 3;
      brw_emit_tri_setup( &c );
      break;
   case SF_LINES:
      c.nr_verts = 2;
      brw_emit_line_setup( &c );
      break;
   case SF_POINTS:
      c.nr_verts = 1;
      brw_emit_point_setup( &c );
      break;

   case SF_UNFILLED_TRIS:
   default:
      assert(0);
      return;
   }



   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   /* Upload
    */
   brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG],
					      &c.key,
					      sizeof(c.key),
					      program,
					      program_size,
					      &c.prog_data,
					      &brw->sf.prog_data );
}
コード例 #9
0
ファイル: brw_sf.c プロジェクト: ashmew2/kolibriosSVN
static void compile_sf_prog( struct brw_context *brw,
                             struct brw_sf_prog_key *key )
{
    struct brw_sf_compile c;
    const GLuint *program;
    void *mem_ctx;
    GLuint program_size;

    memset(&c, 0, sizeof(c));

    mem_ctx = ralloc_context(NULL);
    /* Begin the compilation:
     */
    brw_init_codegen(brw->intelScreen->devinfo, &c.func, mem_ctx);

    c.key = *key;
    c.vue_map = brw->vue_map_geom_out;
    if (c.key.do_point_coord) {
        /*
         * gl_PointCoord is a FS instead of VS builtin variable, thus it's
         * not included in c.vue_map generated in VS stage. Here we add
         * it manually to let SF shader generate the needed interpolation
         * coefficient for FS shader.
         */
        c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
        c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
    }
    c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
    c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
    c.nr_setup_regs = c.nr_attr_regs;

    c.prog_data.urb_read_length = c.nr_attr_regs;
    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
    c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode);

    /* Which primitive?  Or all three?
     */
    switch (key->primitive) {
    case SF_TRIANGLES:
        c.nr_verts = 3;
        brw_emit_tri_setup( &c, true );
        break;
    case SF_LINES:
        c.nr_verts = 2;
        brw_emit_line_setup( &c, true );
        break;
    case SF_POINTS:
        c.nr_verts = 1;
        if (key->do_point_sprite)
            brw_emit_point_sprite_setup( &c, true );
        else
            brw_emit_point_setup( &c, true );
        break;
    case SF_UNFILLED_TRIS:
        c.nr_verts = 3;
        brw_emit_anyprim_setup( &c );
        break;
    default:
        unreachable("not reached");
    }

    /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
     * source). Compacting would be difficult.
     */
    /* brw_compact_instructions(&c.func, 0, 0, NULL); */

    /* get the program
     */
    program = brw_get_program(&c.func, &program_size);

    if (unlikely(INTEL_DEBUG & DEBUG_SF)) {
        fprintf(stderr, "sf:\n");
        brw_disassemble(brw->intelScreen->devinfo,
                        c.func.store, 0, program_size, stderr);
        fprintf(stderr, "\n");
    }

    brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG,
                     &c.key, sizeof(c.key),
                     program, program_size,
                     &c.prog_data, sizeof(c.prog_data),
                     &brw->sf.prog_offset, &brw->sf.prog_data);
    ralloc_free(mem_ctx);
}
コード例 #10
0
ファイル: brw_wm.c プロジェクト: astrofimov/vgallium
static void do_wm_prog( struct brw_context *brw,
			struct brw_fragment_program *fp, 
			struct brw_wm_prog_key *key)
{
   struct brw_wm_compile *c;
   const GLuint *program;
   GLuint program_size;

   c = brw->wm.compile_data;
   if (c == NULL) {
     brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
     c = brw->wm.compile_data;
   } else {
     memset(c, 0, sizeof(*brw->wm.compile_data));
   }
   memcpy(&c->key, key, sizeof(*key));

   c->fp = fp;
   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;


   /* Augment fragment program.  Add instructions for pre- and
    * post-fragment-program tasks such as interpolation and fogging.
    */
   brw_wm_pass_fp(c);
   
   /* Translate to intermediate representation.  Build register usage
    * chains.
    */
   brw_wm_pass0(c);

   /* Dead code removal.
    */
   brw_wm_pass1(c);

   /* Hal optimization
    */
   brw_wm_pass_hal (c);
   
   /* Register allocation.
    */
   c->grf_limit = BRW_WM_MAX_GRF/2;

   /* This is where we start emitting gen4 code:
    */
   brw_init_compile(&c->func);    

   brw_wm_pass2(c);

   c->prog_data.total_grf = c->max_wm_grf;
   if (c->last_scratch) {
      c->prog_data.total_scratch =
	 c->last_scratch + 0x40;
   } else {
      c->prog_data.total_scratch = 0;
   }

   /* Emit GEN4 code.
    */
   brw_wm_emit(c);

   /* get the program
    */
   program = brw_get_program(&c->func, &program_size);

   /*
    */
   brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG],
					      &c->key,
					      sizeof(c->key),
					      program,
					      program_size,
					      &c->prog_data,
					      &brw->wm.prog_data );
}
コード例 #11
0
ファイル: brw_clip.c プロジェクト: CSRedRat/mesa-1
static void compile_clip_prog( struct brw_context *brw,
			     struct brw_clip_prog_key *key )
{
   struct brw_clip_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;
   GLuint i;

   memset(&c, 0, sizeof(c));

   mem_ctx = ralloc_context(NULL);
   
   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

   c.key = *key;
   c.vue_map = brw->vue_map_geom_out;

   c.has_flat_shading =
      brw_any_flat_varyings(&key->interpolation_mode);
   c.has_noperspective_shading =
      brw_any_noperspective_varyings(&key->interpolation_mode);

   /* nr_regs is the number of registers filled by reading data from the VUE.
    * This program accesses the entire VUE, so nr_regs needs to be the size of
    * the VUE (measured in pairs, since two slots are stored in each
    * register).
    */
   c.nr_regs = (c.vue_map.num_slots + 1)/2;

   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.  
    */
   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Would ideally have the option of producing a program which could
    * do all three:
    */
   switch (key->primitive) {
   case GL_TRIANGLES: 
      if (key->do_unfilled)
	 brw_emit_unfilled_clip( &c );
      else
	 brw_emit_tri_clip( &c );
      break;
   case GL_LINES:
      brw_emit_line_clip( &c );
      break;
   case GL_POINTS:
      brw_emit_point_clip( &c );
      break;
   default:
      assert(0);
      return;
   }

	 

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) {
      printf("clip:\n");
      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
		    brw->gen);
      printf("\n");
   }

   brw_upload_cache(&brw->cache,
		    BRW_CLIP_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->clip.prog_offset, &brw->clip.prog_data);
   ralloc_free(mem_ctx);
}
コード例 #12
0
ファイル: brw_vs.c プロジェクト: GunioRobot/mesa-7.10.2-PS3
static void do_vs_prog( struct brw_context *brw, 
			struct brw_vertex_program *vp,
			struct brw_vs_prog_key *key )
{
   struct gl_context *ctx = &brw->intel.ctx;
   GLuint program_size;
   const GLuint *program;
   struct brw_vs_compile c;
   int aux_size;
   int i;

   memset(&c, 0, sizeof(c));
   memcpy(&c.key, key, sizeof(*key));

   brw_init_compile(brw, &c.func);
   c.vp = vp;

   c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
   c.prog_data.inputs_read = vp->program.Base.InputsRead;

   if (c.key.copy_edgeflag) {
      c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
      c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
   }

   /* Put dummy slots into the VUE for the SF to put the replaced
    * point sprite coords in.  We shouldn't need these dummy slots,
    * which take up precious URB space, but it would mean that the SF
    * doesn't get nice aligned pairs of input coords into output
    * coords, which would be a pain to handle.
    */
   for (i = 0; i < 8; i++) {
      if (c.key.point_coord_replace & (1 << i))
	 c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
   }

   if (0) {
      _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
			       GL_TRUE);
   }

   /* Emit GEN4 code.
    */
   brw_vs_emit(&c);

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   /* We upload from &c.prog_data including the constant_map assuming
    * they're packed together.  It would be nice to have a
    * compile-time assert macro here.
    */
   assert(c.constant_map == (int8_t *)&c.prog_data +
	  sizeof(c.prog_data));
   assert(ctx->Const.VertexProgram.MaxNativeParameters ==
	  ARRAY_SIZE(c.constant_map));
   (void) ctx;

   aux_size = sizeof(c.prog_data);
   /* constant_map */
   aux_size += c.vp->program.Base.Parameters->NumParameters;

   drm_intel_bo_unreference(brw->vs.prog_bo);
   brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
						   &c.key, sizeof(c.key),
						   NULL, 0,
						   program, program_size,
						   &c.prog_data,
						   aux_size,
						   &brw->vs.prog_data);
}
コード例 #13
0
ファイル: brw_wm.c プロジェクト: CPFDSoftware-Tony/gmv
/**
 * All Mesa program -> GPU code generation goes through this function.
 * Depending on the instructions used (i.e. flow control instructions)
 * we'll use one of two code generators.
 */
static enum pipe_error do_wm_prog( struct brw_context *brw,
                                   struct brw_fragment_shader *fp, 
                                   struct brw_wm_prog_key *key,
                                   struct brw_winsys_buffer **bo_out)
{
   enum pipe_error ret;
   struct brw_wm_compile *c;
   const GLuint *program;
   GLuint program_size;

   if (brw->wm.compile_data == NULL) {
      brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data));
      if (!brw->wm.compile_data) 
         return PIPE_ERROR_OUT_OF_MEMORY;
   }

   c = brw->wm.compile_data;
   memset(c, 0, sizeof *c);

   c->key = *key;
   c->fp = fp;
   c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/

   brw_init_compile(brw, &c->func);

   /*
    * Shader which use GLSL features such as flow control are handled
    * differently from "simple" shaders.
    */
   if (fp->has_flow_control) {
      c->dispatch_width = 8;
      /* XXX: GLSL support
       */
      exit(1);
      /* brw_wm_branching_shader_emit(brw, c); */
   }
   else {
      c->dispatch_width = 16;
      brw_wm_linear_shader_emit(brw, c);
   }

   if (BRW_DEBUG & DEBUG_WM)
      debug_printf("\n");

   /* get the program
    */
   ret = brw_get_program(&c->func, &program, &program_size);
   if (ret)
      return ret;

   ret = brw_upload_cache( &brw->cache, BRW_WM_PROG,
                           &c->key, sizeof(c->key),
                           NULL, 0,
                           program, program_size,
                           &c->prog_data,
                           &brw->wm.prog_data,
                           bo_out );
   if (ret)
      return ret;

   return PIPE_OK;
}
コード例 #14
0
ファイル: brw_gs.c プロジェクト: DirectFB/mesa
static void compile_ff_gs_prog(struct brw_context *brw,
                               struct brw_ff_gs_prog_key *key)
{
   struct brw_ff_gs_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;

   memset(&c, 0, sizeof(c));

   c.key = *key;
   c.vue_map = brw->vs.prog_data->base.vue_map;
   c.nr_regs = (c.vue_map.num_slots + 1)/2;

   mem_ctx = ralloc_context(NULL);

   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.
    */
   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);

   if (brw->gen >= 6) {
      unsigned num_verts;
      bool check_edge_flag;
      /* On Sandybridge, we use the GS for implementing transform feedback
       * (called "Stream Out" in the PRM).
       */
      switch (key->primitive) {
      case _3DPRIM_POINTLIST:
         num_verts = 1;
         check_edge_flag = false;
	 break;
      case _3DPRIM_LINELIST:
      case _3DPRIM_LINESTRIP:
      case _3DPRIM_LINELOOP:
         num_verts = 2;
         check_edge_flag = false;
	 break;
      case _3DPRIM_TRILIST:
      case _3DPRIM_TRIFAN:
      case _3DPRIM_TRISTRIP:
      case _3DPRIM_RECTLIST:
	 num_verts = 3;
         check_edge_flag = false;
         break;
      case _3DPRIM_QUADLIST:
      case _3DPRIM_QUADSTRIP:
      case _3DPRIM_POLYGON:
         num_verts = 3;
         check_edge_flag = true;
         break;
      default:
	 assert(!"Unexpected primitive type in Gen6 SOL program.");
	 return;
      }
      gen6_sol_program(&c, key, num_verts, check_edge_flag);
   } else {
      /* On Gen4-5, we use the GS to decompose certain types of primitives.
       * Note that primitives which don't require a GS program have already
       * been weeded out by now.
       */
      switch (key->primitive) {
      case _3DPRIM_QUADLIST:
	 brw_ff_gs_quads( &c, key );
	 break;
      case _3DPRIM_QUADSTRIP:
	 brw_ff_gs_quad_strip( &c, key );
	 break;
      case _3DPRIM_LINELOOP:
	 brw_ff_gs_lines( &c );
	 break;
      default:
	 ralloc_free(mem_ctx);
	 return;
      }
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
      int i;

      printf("gs:\n");
      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
		    brw->gen);
      printf("\n");
    }

   brw_upload_cache(&brw->cache, BRW_FF_GS_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data);
   ralloc_free(mem_ctx);
}
コード例 #15
0
ファイル: brw_clip.c プロジェクト: Echelon9/mesa
static void compile_clip_prog( struct brw_context *brw,
			     struct brw_clip_prog_key *key )
{
   struct brw_clip_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;

   memset(&c, 0, sizeof(c));

   mem_ctx = ralloc_context(NULL);

   /* Begin the compilation:
    */
   brw_init_codegen(&brw->screen->devinfo, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

   c.key = *key;
   c.vue_map = brw->vue_map_geom_out;

   /* nr_regs is the number of registers filled by reading data from the VUE.
    * This program accesses the entire VUE, so nr_regs needs to be the size of
    * the VUE (measured in pairs, since two slots are stored in each
    * register).
    */
   c.nr_regs = (c.vue_map.num_slots + 1)/2;

   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.
    */
   brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Would ideally have the option of producing a program which could
    * do all three:
    */
   switch (key->primitive) {
   case GL_TRIANGLES:
      if (key->do_unfilled)
	 brw_emit_unfilled_clip( &c );
      else
	 brw_emit_tri_clip( &c );
      break;
   case GL_LINES:
      brw_emit_line_clip( &c );
      break;
   case GL_POINTS:
      brw_emit_point_clip( &c );
      break;
   default:
      unreachable("not reached");
   }

   brw_compact_instructions(&c.func, 0, 0, NULL);

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) {
      fprintf(stderr, "clip:\n");
      brw_disassemble(&brw->screen->devinfo, c.func.store,
                      0, program_size, stderr);
      fprintf(stderr, "\n");
   }

   brw_upload_cache(&brw->cache,
		    BRW_CACHE_CLIP_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->clip.prog_offset, &brw->clip.prog_data);
   ralloc_free(mem_ctx);
}
コード例 #16
0
ファイル: brw_gs.c プロジェクト: nikai3d/mesa
static void compile_gs_prog( struct brw_context *brw,
			     struct brw_gs_prog_key *key )
{
   struct intel_context *intel = &brw->intel;
   struct brw_gs_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;

   /* Gen6: VF has already converted into polygon, and LINELOOP is
    * converted to LINESTRIP at the beginning of the 3D pipeline.
    */
   if (intel->gen >= 6)
      return;

   memset(&c, 0, sizeof(c));
   
   c.key = *key;
   /* Need to locate the two positions present in vertex + header.
    * These are currently hardcoded:
    */
   c.nr_attrs = brw_count_bits(c.key.attrs);

   if (intel->gen >= 5)
       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
   else
       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */

   c.nr_bytes = c.nr_regs * REG_SIZE;

   mem_ctx = NULL;
   
   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.  
    */
   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Note that primitives which don't require a GS program have
    * already been weeded out by this stage:
    */

   switch (key->primitive) {
   case GL_QUADS:
      brw_gs_quads( &c, key );
      break;
   case GL_QUAD_STRIP:
      brw_gs_quad_strip( &c, key );
      break;
   case GL_LINE_LOOP:
      brw_gs_lines( &c );
      break;
   default:
      ralloc_free(mem_ctx);
      return;
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
      int i;

      printf("gs:\n");
      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
		    intel->gen);
      printf("\n");
    }

   brw_upload_cache(&brw->cache, BRW_GS_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->gs.prog_offset, &brw->gs.prog_data);
   ralloc_free(mem_ctx);
}
コード例 #17
0
ファイル: brw_clip.c プロジェクト: AchironOS/chromium.src
static void compile_clip_prog( struct brw_context *brw,
			     struct brw_clip_prog_key *key )
{
   struct intel_context *intel = &brw->intel;
   struct brw_clip_compile c;
   const GLuint *program;
   GLuint program_size;
   GLuint delta;
   GLuint i;
   GLuint header_regs;

   memset(&c, 0, sizeof(c));
   
   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func);

   c.func.single_program_flow = 1;

   c.key = *key;

   /* Need to locate the two positions present in vertex + header.
    * These are currently hardcoded:
    */
   c.header_position_offset = ATTR_SIZE;

   if (intel->gen == 5)
      header_regs = 3;
   else
      header_regs = 1;

   delta = header_regs * REG_SIZE;

   for (i = 0; i < VERT_RESULT_MAX; i++) {
      if (c.key.attrs & BITFIELD64_BIT(i)) {
	 c.offset[i] = delta;
	 delta += ATTR_SIZE;

	 c.idx_to_attr[c.nr_attrs] = i;
	 c.nr_attrs++;
      }
   }

   /* The vertex attributes start at a URB row-aligned offset after
    * the 8-20 dword vertex header, and continue for a URB row-aligned
    * length.  nr_regs determines the urb_read_length from the start
    * of the header to the end of the vertex data.
    */
   c.nr_regs = header_regs + (c.nr_attrs + 1) / 2;

   c.nr_bytes = c.nr_regs * REG_SIZE;

   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.  
    */
   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Would ideally have the option of producing a program which could
    * do all three:
    */
   switch (key->primitive) {
   case GL_TRIANGLES: 
      if (key->do_unfilled)
	 brw_emit_unfilled_clip( &c );
      else
	 brw_emit_tri_clip( &c );
      break;
   case GL_LINES:
      brw_emit_line_clip( &c );
      break;
   case GL_POINTS:
      brw_emit_point_clip( &c );
      break;
   default:
      assert(0);
      return;
   }

	 

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

    if (INTEL_DEBUG & DEBUG_CLIP) {
      printf("clip:\n");
      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
		    intel->gen);
      printf("\n");
    }

   /* Upload
    */
   drm_intel_bo_unreference(brw->clip.prog_bo);
   brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache,
						     BRW_CLIP_PROG,
						     &c.key, sizeof(c.key),
						     NULL, 0,
						     program, program_size,
						     &c.prog_data,
						     sizeof(c.prog_data),
						     &brw->clip.prog_data);
}