コード例 #1
0
/* Creates a new VS constant buffer reflecting the current VS program's
 * constants, if needed by the VS program.
 *
 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 * state atom.
 */
static void
brw_upload_vs_pull_constants(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = &brw->intel;
   /* BRW_NEW_VERTEX_PROGRAM */
   struct brw_vertex_program *vp =
      (struct brw_vertex_program *) brw->vertex_program;
   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
   int i;

   if (vp->program.IsNVProgram)
      _mesa_load_tracked_matrices(ctx);

   /* Updates the ParamaterValues[i] pointers for all parameters of the
    * basic type of PROGRAM_STATE_VAR.
    */
   _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);

   /* CACHE_NEW_VS_PROG */
   if (!brw->vs.prog_data->nr_pull_params) {
      if (brw->vs.const_bo) {
	 drm_intel_bo_unreference(brw->vs.const_bo);
	 brw->vs.const_bo = NULL;
	 brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
	 brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
      }
      return;
   }

   /* _NEW_PROGRAM_CONSTANTS */
   drm_intel_bo_unreference(brw->vs.const_bo);
   brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
					 brw->vs.prog_data->nr_pull_params * 4,
					 64);

   drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
   for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) {
      memcpy(brw->vs.const_bo->virtual + i * 4,
	     brw->vs.prog_data->pull_param[i],
	     4);
   }

   if (0) {
      for (i = 0; i < params->NumParameters; i++) {
	 float *row = (float *)brw->vs.const_bo->virtual + i * 4;
	 printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
		i, row[0], row[1], row[2], row[3]);
      }
   }

   drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);

   const int surf = SURF_INDEX_VERT_CONST_BUFFER;
   intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
				       params->NumParameters,
				       &brw->bind.surf_offset[surf]);

   brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
コード例 #2
0
/**
 * Write parameter array for the given vertex program into dst.
 * Return the total number of components written.
 */
static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
{
	int i;

	if (vp->Base->IsNVProgram) {
		_mesa_load_tracked_matrices(ctx);
	} else {
		if (vp->Base->Base.Parameters) {
			_mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
		}
	}

	for(i = 0; i < vp->code.constants.Count; ++i) {
		const float * src = 0;
		const struct rc_constant * constant = &vp->code.constants.Constants[i];

		switch(constant->Type) {
		case RC_CONSTANT_EXTERNAL:
			if (vp->Base->IsNVProgram) {
				src = ctx->VertexProgram.Parameters[constant->u.External];
			} else {
				src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
			}
			break;

		case RC_CONSTANT_IMMEDIATE:
			src = constant->u.Immediate;
			break;
		}

		dst[4*i] = src[0];
		dst[4*i + 1] = src[1];
		dst[4*i + 2] = src[2];
		dst[4*i + 3] = src[3];
	}

	return 4 * vp->code.constants.Count;
}
コード例 #3
0
ファイル: t_vb_program.c プロジェクト: beligit/psx4m
/**
 * This function executes vertex programs
 */
static GLboolean
run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage )
{
   TNLcontext *tnl = TNL_CONTEXT(ctx);
   struct vp_stage_data *store = VP_STAGE_DATA(stage);
   struct vertex_buffer *VB = &tnl->vb;
   struct gl_vertex_program *program = ctx->VertexProgram._Current;
   struct gl_program_machine machine;
   GLuint outputs[VERT_RESULT_MAX], numOutputs;
   GLuint i, j;

   if (!program)
      return GL_TRUE;

   if (program->IsNVProgram) {
      _mesa_load_tracked_matrices(ctx);
   }
   else {
      /* ARB program or vertex shader */
      _mesa_load_state_parameters(ctx, program->Base.Parameters);
   }

   numOutputs = 0;
   for (i = 0; i < VERT_RESULT_MAX; i++) {
      if (program->Base.OutputsWritten & (1 << i)) {
         outputs[numOutputs++] = i;
      }
   }

   for (i = 0; i < VB->Count; i++) {
      GLuint attr;

      init_machine(ctx, &machine);

#if 0
      printf("Input  %d: %f, %f, %f, %f\n", i,
             VB->AttribPtr[0]->data[i][0],
             VB->AttribPtr[0]->data[i][1],
             VB->AttribPtr[0]->data[i][2],
             VB->AttribPtr[0]->data[i][3]);
      printf("   color: %f, %f, %f, %f\n",
             VB->AttribPtr[3]->data[i][0],
             VB->AttribPtr[3]->data[i][1],
             VB->AttribPtr[3]->data[i][2],
             VB->AttribPtr[3]->data[i][3]);
      printf("  normal: %f, %f, %f, %f\n",
             VB->AttribPtr[2]->data[i][0],
             VB->AttribPtr[2]->data[i][1],
             VB->AttribPtr[2]->data[i][2],
             VB->AttribPtr[2]->data[i][3]);
#endif

      /* the vertex array case */
      for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
	 if (program->Base.InputsRead & (1 << attr)) {
	    const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data;
	    const GLuint size = VB->AttribPtr[attr]->size;
	    const GLuint stride = VB->AttribPtr[attr]->stride;
	    const GLfloat *data = (GLfloat *) (ptr + stride * i);
	    COPY_CLEAN_4V(machine.VertAttribs[attr], size, data);
	 }
      }

      /* execute the program */
      _mesa_execute_program(ctx, &program->Base, &machine);

      /* copy the output registers into the VB->attribs arrays */
      for (j = 0; j < numOutputs; j++) {
         const GLuint attr = outputs[j];
         COPY_4V(store->results[attr].data[i], machine.Outputs[attr]);
      }
#if 0
      printf("HPOS: %f %f %f %f\n",
             machine.Outputs[0][0], 
             machine.Outputs[0][1], 
             machine.Outputs[0][2], 
             machine.Outputs[0][3]);
#endif
   }

   /* Fixup fog and point size results if needed */
   if (program->IsNVProgram) {
      if (ctx->Fog.Enabled &&
          (program->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_FOGC].data[i][0] = 1.0;
         }
      }

      if (ctx->VertexProgram.PointSizeEnabled &&
          (program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size;
         }
      }
   }

   /* Setup the VB pointers so that the next pipeline stages get
    * their data from the right place (the program output arrays).
    */
   VB->ClipPtr = &store->results[VERT_RESULT_HPOS];
   VB->ClipPtr->size = 4;
   VB->ClipPtr->count = VB->Count;
   VB->ColorPtr[0] = &store->results[VERT_RESULT_COL0];
   VB->ColorPtr[1] = &store->results[VERT_RESULT_BFC0];
   VB->SecondaryColorPtr[0] = &store->results[VERT_RESULT_COL1];
   VB->SecondaryColorPtr[1] = &store->results[VERT_RESULT_BFC1];
   VB->FogCoordPtr = &store->results[VERT_RESULT_FOGC];

   VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VERT_RESULT_COL0];
   VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VERT_RESULT_COL1];
   VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VERT_RESULT_FOGC];
   VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VERT_RESULT_PSIZ];

   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
      VB->TexCoordPtr[i] = 
      VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]
         = &store->results[VERT_RESULT_TEX0 + i];
   }

   for (i = 0; i < ctx->Const.MaxVarying; i++) {
      if (program->Base.OutputsWritten & (1 << (VERT_RESULT_VAR0 + i))) {
         /* Note: varying results get put into the generic attributes */
	 VB->AttribPtr[VERT_ATTRIB_GENERIC0+i]
            = &store->results[VERT_RESULT_VAR0 + i];
      }
   }

   /* Cliptest and perspective divide.  Clip functions must clear
    * the clipmask.
    */
   store->ormask = 0;
   store->andmask = CLIP_FRUSTUM_BITS;

   if (tnl->NeedNdcCoords) {
      VB->NdcPtr =
         _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
                                            &store->ndcCoords,
                                            store->clipmask,
                                            &store->ormask,
                                            &store->andmask );
   }
   else {
      VB->NdcPtr = NULL;
      _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
                                            NULL,
                                            store->clipmask,
                                            &store->ormask,
                                            &store->andmask );
   }

   if (store->andmask)  /* All vertices are outside the frustum */
      return GL_FALSE;


   /* This is where we'd do clip testing against the user-defined
    * clipping planes, but they're not supported by vertex programs.
    */

   VB->ClipOrMask = store->ormask;
   VB->ClipMask = store->clipmask;

   return GL_TRUE;
}
コード例 #4
0
/**
 * This function executes vertex programs
 */
static GLboolean
run_vp( struct gl_context *ctx, struct tnl_pipeline_stage *stage )
{
   TNLcontext *tnl = TNL_CONTEXT(ctx);
   struct vp_stage_data *store = VP_STAGE_DATA(stage);
   struct vertex_buffer *VB = &tnl->vb;
   struct gl_vertex_program *program = ctx->VertexProgram._Current;
   struct gl_program_machine *machine = &store->machine;
   GLuint outputs[VERT_RESULT_MAX], numOutputs;
   GLuint i, j;

   if (!program)
      return GL_TRUE;

   if (program->IsNVProgram) {
      _mesa_load_tracked_matrices(ctx);
   }
   else {
      /* ARB program or vertex shader */
      _mesa_load_state_parameters(ctx, program->Base.Parameters);
   }

   /* make list of outputs to save some time below */
   numOutputs = 0;
   for (i = 0; i < VERT_RESULT_MAX; i++) {
      if (program->Base.OutputsWritten & BITFIELD64_BIT(i)) {
         outputs[numOutputs++] = i;
      }
   }

   /* Allocate result vectors.  We delay this until now to avoid allocating
    * memory that would never be used if we don't run the software tnl pipeline.
    */
   if (!store->results[0].storage) {
      for (i = 0; i < VERT_RESULT_MAX; i++) {
         assert(!store->results[i].storage);
         _mesa_vector4f_alloc( &store->results[i], 0, VB->Size, 32 );
         store->results[i].size = 4;
      }
   }

   map_textures(ctx, program);

   for (i = 0; i < VB->Count; i++) {
      GLuint attr;

      init_machine(ctx, machine, tnl->CurInstance);

#if 0
      printf("Input  %d: %f, %f, %f, %f\n", i,
             VB->AttribPtr[0]->data[i][0],
             VB->AttribPtr[0]->data[i][1],
             VB->AttribPtr[0]->data[i][2],
             VB->AttribPtr[0]->data[i][3]);
      printf("   color: %f, %f, %f, %f\n",
             VB->AttribPtr[3]->data[i][0],
             VB->AttribPtr[3]->data[i][1],
             VB->AttribPtr[3]->data[i][2],
             VB->AttribPtr[3]->data[i][3]);
      printf("  normal: %f, %f, %f, %f\n",
             VB->AttribPtr[2]->data[i][0],
             VB->AttribPtr[2]->data[i][1],
             VB->AttribPtr[2]->data[i][2],
             VB->AttribPtr[2]->data[i][3]);
#endif

      /* the vertex array case */
      for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
	 if (program->Base.InputsRead & BITFIELD64_BIT(attr)) {
	    const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data;
	    const GLuint size = VB->AttribPtr[attr]->size;
	    const GLuint stride = VB->AttribPtr[attr]->stride;
	    const GLfloat *data = (GLfloat *) (ptr + stride * i);
#ifdef NAN_CHECK
            check_float(data[0]);
            check_float(data[1]);
            check_float(data[2]);
            check_float(data[3]);
#endif
	    COPY_CLEAN_4V(machine->VertAttribs[attr], size, data);
	 }
      }

      /* execute the program */
      _mesa_execute_program(ctx, &program->Base, machine);

      /* copy the output registers into the VB->attribs arrays */
      for (j = 0; j < numOutputs; j++) {
         const GLuint attr = outputs[j];
#ifdef NAN_CHECK
         check_float(machine->Outputs[attr][0]);
         check_float(machine->Outputs[attr][1]);
         check_float(machine->Outputs[attr][2]);
         check_float(machine->Outputs[attr][3]);
#endif
         COPY_4V(store->results[attr].data[i], machine->Outputs[attr]);
      }

      /* FOGC is a special case.  Fragment shader expects (f,0,0,1) */
      if (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_FOGC)) {
         store->results[VERT_RESULT_FOGC].data[i][1] = 0.0;
         store->results[VERT_RESULT_FOGC].data[i][2] = 0.0;
         store->results[VERT_RESULT_FOGC].data[i][3] = 1.0;
      }
#ifdef NAN_CHECK
      ASSERT(machine->Outputs[0][3] != 0.0F);
#endif
#if 0
      printf("HPOS: %f %f %f %f\n",
             machine->Outputs[0][0], 
             machine->Outputs[0][1], 
             machine->Outputs[0][2], 
             machine->Outputs[0][3]);
#endif
   }

   unmap_textures(ctx, program);

   /* Fixup fog and point size results if needed */
   if (program->IsNVProgram) {
      if (ctx->Fog.Enabled &&
          (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_FOGC)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_FOGC].data[i][0] = 1.0;
         }
      }

      if (ctx->VertexProgram.PointSizeEnabled &&
          (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_PSIZ)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size;
         }
      }
   }

   if (program->IsPositionInvariant) {
      /* We need the exact same transform as in the fixed function path here
       * to guarantee invariance, depending on compiler optimization flags
       * results could be different otherwise.
       */
      VB->ClipPtr = TransformRaw( &store->results[0],
				  &ctx->_ModelProjectMatrix,
				  VB->AttribPtr[0] );

      /* Drivers expect this to be clean to element 4...
       */
      switch (VB->ClipPtr->size) {
      case 1:
	 /* impossible */
      case 2:
	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 2 );
	 /* fall-through */
      case 3:
	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 3 );
	 /* fall-through */
      case 4:
	 break;
      }
   }
   else {
      /* Setup the VB pointers so that the next pipeline stages get
       * their data from the right place (the program output arrays).
       */
      VB->ClipPtr = &store->results[VERT_RESULT_HPOS];
      VB->ClipPtr->size = 4;
      VB->ClipPtr->count = VB->Count;
   }

   VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VERT_RESULT_COL0];
   VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VERT_RESULT_COL1];
   VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VERT_RESULT_FOGC];
   VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VERT_RESULT_PSIZ];
   VB->BackfaceColorPtr = &store->results[VERT_RESULT_BFC0];
   VB->BackfaceSecondaryColorPtr = &store->results[VERT_RESULT_BFC1];

   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
      VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]
         = &store->results[VERT_RESULT_TEX0 + i];
   }

   for (i = 0; i < ctx->Const.MaxVarying; i++) {
      if (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_VAR0 + i)) {
         /* Note: varying results get put into the generic attributes */
	 VB->AttribPtr[VERT_ATTRIB_GENERIC0+i]
            = &store->results[VERT_RESULT_VAR0 + i];
      }
   }


   /* Perform NDC and cliptest operations:
    */
   return do_ndc_cliptest(ctx, store);
}
コード例 #5
0
ファイル: gen6_vs_state.c プロジェクト: CPFDSoftware-Tony/gmv
static void
upload_vs_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   GLcontext *ctx = &intel->ctx;
   const struct brw_vertex_program *vp =
      brw_vertex_program_const(brw->vertex_program);
   unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
   drm_intel_bo *constant_bo;
   int i;

   if (vp->use_const_buffer || nr_params == 0) {
      /* Disable the push constant buffers. */
      BEGIN_BATCH(5);
      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      if (brw->vertex_program->IsNVProgram)
	 _mesa_load_tracked_matrices(ctx);

      /* Updates the ParamaterValues[i] pointers for all parameters of the
       * basic type of PROGRAM_STATE_VAR.
       */
      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);

      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
				       nr_params * 4 * sizeof(float),
				       4096);
      drm_intel_gem_bo_map_gtt(constant_bo);
      for (i = 0; i < nr_params; i++) {
	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
		vp->program.Base.Parameters->ParameterValues[i],
		4 * sizeof(float));
      }
      drm_intel_gem_bo_unmap_gtt(constant_bo);

      BEGIN_BATCH(5);
      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
		GEN6_CONSTANT_BUFFER_0_ENABLE |
		(5 - 2));
      OUT_RELOC(constant_bo,
		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
		ALIGN(nr_params, 2) / 2 - 1);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();

      drm_intel_bo_unreference(constant_bo);
   }

   intel_batchbuffer_emit_mi_flush(intel->batch);

   BEGIN_BATCH(6);
   OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
   OUT_BATCH(0); /* scratch space base offset */
   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
   OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
	     GEN6_VS_STATISTICS_ENABLE);
   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel->batch);
}
コード例 #6
0
ファイル: brw_curbe.c プロジェクト: CPFDSoftware-Tony/gmv
/* Upload a new set of constants.  Too much variability to go into the
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
static void prepare_constant_buffer(struct brw_context *brw)
{
   GLcontext *ctx = &brw->intel.ctx;
   const struct brw_vertex_program *vp =
      brw_vertex_program_const(brw->vertex_program);
   const struct brw_fragment_program *fp =
      brw_fragment_program_const(brw->fragment_program);
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   GLfloat *buf;
   GLuint i;

   if (sz == 0) {
      if (brw->curbe.last_buf) {
	 free(brw->curbe.last_buf);
	 brw->curbe.last_buf = NULL;
	 brw->curbe.last_bufsz  = 0;
      }
      return;
   }

   buf = (GLfloat *) calloc(1, bufsz);

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      GLuint offset = brw->curbe.wm_start * 16;

      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); 

      /* copy float constants */
      for (i = 0; i < brw->wm.prog_data->nr_params; i++) 
	 buf[offset + i] = *brw->wm.prog_data->param[i];
   }


   /* The clipplanes are actually delivered to both CLIP and VS units.
    * VS uses them to calculate the outcode bitmasks.
    */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLuint j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      assert(MAX_CLIP_PLANES == 6);
      for (j = 0; j < MAX_CLIP_PLANES; j++) {
	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
	    buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
	    buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
	    buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
	    buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
	    i++;
	 }
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      GLuint offset = brw->curbe.vs_start * 16;
      GLuint nr = brw->vs.prog_data->nr_params / 4;

      if (brw->vertex_program->IsNVProgram)
	 _mesa_load_tracked_matrices(ctx);

      /* Updates the ParamaterValues[i] pointers for all parameters of the
       * basic type of PROGRAM_STATE_VAR.
       */
      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); 

      if (vp->use_const_buffer) {
	 /* Load the subset of push constants that will get used when
	  * we also have a pull constant buffer.
	  */
	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
	    if (brw->vs.constant_map[i] != -1) {
	       assert(brw->vs.constant_map[i] <= nr);
	       memcpy(buf + offset + brw->vs.constant_map[i] * 4,
		      vp->program.Base.Parameters->ParameterValues[i],
		      4 * sizeof(float));
	    }
	 }
      } else {
	 for (i = 0; i < nr; i++) {
	    memcpy(buf + offset + i * 4,
		   vp->program.Base.Parameters->ParameterValues[i],
		   4 * sizeof(float));
	 }
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4) 
	 printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
		buf[i+0], buf[i+1], buf[i+2], buf[i+3]);

      printf("last_buf %p buf %p sz %d/%d cmp %d\n",
	     brw->curbe.last_buf, buf,
	     bufsz, brw->curbe.last_bufsz,
	     brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
   }

   if (brw->curbe.curbe_bo != NULL &&
       brw->curbe.last_buf &&
       bufsz == brw->curbe.last_bufsz &&
       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
      /* constants have not changed */
      free(buf);
   } 
   else {
      /* constants have changed */
      if (brw->curbe.last_buf)
	 free(brw->curbe.last_buf);

      brw->curbe.last_buf = buf;
      brw->curbe.last_bufsz = bufsz;

      if (brw->curbe.curbe_bo != NULL &&
	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
      {
	 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
	 dri_bo_unreference(brw->curbe.curbe_bo);
	 brw->curbe.curbe_bo = NULL;
      }

      if (brw->curbe.curbe_bo == NULL) {
	 /* Allocate a single page for CURBE entries for this batchbuffer.
	  * They're generally around 64b.
	  */
	 brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
					    4096, 1 << 6);
	 brw->curbe.curbe_next_offset = 0;
	 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
      }

      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
      brw->curbe.curbe_next_offset += bufsz;
      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);

      /* Copy data to the buffer:
       */
      memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
	     buf,
	     bufsz);
   }

   brw_add_validated_bo(brw, brw->curbe.curbe_bo);

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */
}