示例#1
0
/* Upload a new set of constants.  Too much variability to go into the
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
static void upload_constant_buffer(struct brw_context *brw)
{
   GLcontext *ctx = &brw->intel.ctx;
   struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
   struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
   struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
   GLuint sz = brw->curbe.total_size;
   GLuint bufsz = sz * 16 * sizeof(GLfloat);
   GLfloat *buf;
   GLuint i;

   /* Update our own dependency flags.  This works because this
    * function will also be called whenever fp or vp changes.
    */
   brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
   brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
   brw->curbe.tracked_state.dirty.mesa |= fp->param_state;

   if (sz == 0) {
      struct brw_constant_buffer cb;
      cb.header.opcode = CMD_CONST_BUFFER;
      cb.header.length = sizeof(cb)/4 - 2;
      cb.header.valid = 0;
      cb.bits0.buffer_length = 0;
      cb.bits0.buffer_address = 0;
      BRW_BATCH_STRUCT(brw, &cb);

      if (brw->curbe.last_buf) {
	 free(brw->curbe.last_buf);
	 brw->curbe.last_buf = NULL;
	 brw->curbe.last_bufsz  = 0;
      }
       
      return;
   }

   buf = (GLfloat *)malloc(bufsz);

   memset(buf, 0, bufsz);

   if (brw->curbe.wm_size) {
      GLuint offset = brw->curbe.wm_start * 16;

      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); 

      for (i = 0; i < brw->wm.prog_data->nr_params; i++) 
	 buf[offset + i] = brw->wm.prog_data->param[i][0];
   }


   /* The clipplanes are actually delivered to both CLIP and VS units.
    * VS uses them to calculate the outcode bitmasks.
    */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLuint j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      assert(MAX_CLIP_PLANES == 6);
      for (j = 0; j < MAX_CLIP_PLANES; j++) {
	 if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) {
	    buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0];
	    buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1];
	    buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2];
	    buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3];
	    i++;
	 }
      }
   }


   if (brw->curbe.vs_size) {
      GLuint offset = brw->curbe.vs_start * 16;
      GLuint nr = vp->program.Base.Parameters->NumParameters;

      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); 

      for (i = 0; i < nr; i++) {
	 buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
	 buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
	 buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
	 buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4) 
	 _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);

      _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
		   brw->curbe.last_buf, buf,
		   bufsz, brw->curbe.last_bufsz,
		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
   }

   if (brw->curbe.last_buf &&
       bufsz == brw->curbe.last_bufsz &&
       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
      free(buf);
/*       return; */
   } 
   else {
      if (brw->curbe.last_buf)
	 free(brw->curbe.last_buf);
      brw->curbe.last_buf = buf;
      brw->curbe.last_bufsz = bufsz;

      
      if (!brw_pool_alloc(pool, 
			  bufsz,
			  6,
			  &brw->curbe.gs_offset)) {
	 _mesa_printf("out of GS memory for curbe\n");
	 assert(0);
	 return;
      }
            

      /* Copy data to the buffer:
       */
      bmBufferSubDataAUB(&brw->intel,
			 pool->buffer,
			 brw->curbe.gs_offset, 
			 bufsz, 
			 buf,
			 DW_CONSTANT_BUFFER,
			 0);
   }

   /* TODO: only emit the constant_buffer packet when necessary, ie:
      - contents have changed
      - offset has changed
      - hw requirements due to other packets emitted.
   */
   {
      struct brw_constant_buffer cb;
      
      memset(&cb, 0, sizeof(cb));

      cb.header.opcode = CMD_CONST_BUFFER;
      cb.header.length = sizeof(cb)/4 - 2;
      cb.header.valid = 1;
      cb.bits0.buffer_length = sz - 1;
      cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;
      
      /* Because this provokes an action (ie copy the constants into the
       * URB), it shouldn't be shortcircuited if identical to the
       * previous time - because eg. the urb destination may have
       * changed, or the urb contents different to last time.  
       *
       * Note that the data referred to is actually copied internally,
       * not just used in place according to passed pointer.
       *
       * It appears that the CS unit takes care of using each available
       * URB entry (Const URB Entry == CURBE) in turn, and issuing
       * flushes as necessary when doublebuffering of CURBEs isn't
       * possible.
       */
/*       intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
      BRW_BATCH_STRUCT(brw, &cb);
/*       intel_batchbuffer_align(brw->intel.batch, 64, 0); */
   }
}
示例#2
0
/* Upload a new set of constants.  Too much variability to go into the
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
static void upload_constant_buffer(struct brw_context *brw)
{
   struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
   unsigned sz = brw->curbe.total_size;
   unsigned bufsz = sz * sizeof(float);
   float *buf;
   unsigned i;


   if (sz == 0) {
      struct brw_constant_buffer cb;
      cb.header.opcode = CMD_CONST_BUFFER;
      cb.header.length = sizeof(cb)/4 - 2;
      cb.header.valid = 0;
      cb.bits0.buffer_length = 0;
      cb.bits0.buffer_address = 0;
      BRW_BATCH_STRUCT(brw, &cb);

      if (brw->curbe.last_buf) {
	 free(brw->curbe.last_buf);
	 brw->curbe.last_buf = NULL;
	 brw->curbe.last_bufsz  = 0;
      }

      return;
   }

   buf = (float *)malloc(bufsz);

   memset(buf, 0, bufsz);

   if (brw->curbe.wm_size) {
      unsigned offset = brw->curbe.wm_start * 16;

      /* First the constant buffer constants:
       */
      
      /* Then any internally generated constants: 
       */
      for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++)
	 buf[offset + i] = brw->wm.prog_data->internal_const[i];

      assert(brw->wm.prog_data->max_const == 
	     brw->wm.prog_data->nr_internal_consts);
   }


   /* The clipplanes are actually delivered to both CLIP and VS units.
    * VS uses them to calculate the outcode bitmasks.
    */
   if (brw->curbe.clip_size) {
      unsigned offset = brw->curbe.clip_start * 16;
      unsigned j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
      }

      /* Clip planes: BRW_NEW_CLIP:
       */
      for (j = 0; j < brw->attribs.Clip.nr; j++) {
	 buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0];
	 buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1];
	 buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2];
	 buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3];
	 i++;
      }
   }


   if (brw->curbe.vs_size) {
      unsigned offset = brw->curbe.vs_start * 16;
      /*unsigned nr = vp->max_const;*/
      const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0];
      struct pipe_winsys *ws = brw->pipe.winsys;
      /* FIXME: buffer size is num_consts + num_immediates */
      if (brw->vs.prog_data->num_consts) {
         /* map the vertex constant buffer and copy to curbe: */
         void *data = ws->buffer_map(ws, cbuffer->buffer, 0);
         /* FIXME: this is wrong. the cbuffer->buffer->size currently
          * represents size of consts + immediates. so if we'll
          * have both we'll copy over the end of the buffer
          * with the subsequent memcpy */
         memcpy(&buf[offset], data, cbuffer->buffer->size);
         ws->buffer_unmap(ws, cbuffer->buffer);
         offset += cbuffer->buffer->size;
      }
      /*immediates*/
      if (brw->vs.prog_data->num_imm) {
         memcpy(&buf[offset], brw->vs.prog_data->imm_buf,
                brw->vs.prog_data->num_imm * 4 * sizeof(float));
      }
   }

   if (1) {
      for (i = 0; i < sz; i+=4)
	 debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);

      debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
		   brw->curbe.last_buf, buf,
		   bufsz, brw->curbe.last_bufsz,
		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
   }

   if (brw->curbe.last_buf &&
       bufsz == brw->curbe.last_bufsz &&
       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
      free(buf);
/*       return; */
   }
   else {
      if (brw->curbe.last_buf)
	 free(brw->curbe.last_buf);
      brw->curbe.last_buf = buf;
      brw->curbe.last_bufsz = bufsz;


      if (!brw_pool_alloc(pool,
			  bufsz,
			  1 << 6,
			  &brw->curbe.gs_offset)) {
	 debug_printf("out of GS memory for curbe\n");
	 assert(0);
	 return;
      }


      /* Copy data to the buffer:
       */
      brw->winsys->buffer_subdata_typed(brw->winsys,
					pool->buffer, 
					brw->curbe.gs_offset, 
					bufsz, 
					buf,
					BRW_CONSTANT_BUFFER );
   }

   /* TODO: only emit the constant_buffer packet when necessary, ie:
      - contents have changed
      - offset has changed
      - hw requirements due to other packets emitted.
   */
   {
      struct brw_constant_buffer cb;

      memset(&cb, 0, sizeof(cb));

      cb.header.opcode = CMD_CONST_BUFFER;
      cb.header.length = sizeof(cb)/4 - 2;
      cb.header.valid = 1;
      cb.bits0.buffer_length = sz - 1;
      cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;

      /* Because this provokes an action (ie copy the constants into the
       * URB), it shouldn't be shortcircuited if identical to the
       * previous time - because eg. the urb destination may have
       * changed, or the urb contents different to last time.
       *
       * Note that the data referred to is actually copied internally,
       * not just used in place according to passed pointer.
       *
       * It appears that the CS unit takes care of using each available
       * URB entry (Const URB Entry == CURBE) in turn, and issuing
       * flushes as necessary when doublebuffering of CURBEs isn't
       * possible.
       */
      BRW_BATCH_STRUCT(brw, &cb);
   }
}