예제 #1
0
/**
 * This function executes vertex programs
 */
static GLboolean
run_vp( struct gl_context *ctx, struct tnl_pipeline_stage *stage )
{
   TNLcontext *tnl = TNL_CONTEXT(ctx);
   struct vp_stage_data *store = VP_STAGE_DATA(stage);
   struct vertex_buffer *VB = &tnl->vb;
   struct gl_program *program = ctx->VertexProgram._Current;
   struct gl_program_machine *machine = &store->machine;
   GLuint outputs[VARYING_SLOT_MAX], numOutputs;
   GLuint i, j;

   if (!program)
      return GL_TRUE;

   /* ARB program or vertex shader */
   _mesa_load_state_parameters(ctx, program->Parameters);

   /* make list of outputs to save some time below */
   numOutputs = 0;
   for (i = 0; i < VARYING_SLOT_MAX; i++) {
      if (program->info.outputs_written & BITFIELD64_BIT(i)) {
         outputs[numOutputs++] = i;
      }
   }

   /* Allocate result vectors.  We delay this until now to avoid allocating
    * memory that would never be used if we don't run the software tnl pipeline.
    */
   if (!store->results[0].storage) {
      for (i = 0; i < VARYING_SLOT_MAX; i++) {
         assert(!store->results[i].storage);
         _mesa_vector4f_alloc( &store->results[i], 0, VB->Size, 32 );
         store->results[i].size = 4;
      }
   }

   map_textures(ctx, program);

   for (i = 0; i < VB->Count; i++) {
      GLuint attr;

      init_machine(ctx, machine, tnl->CurInstance);

#if 0
      printf("Input  %d: %f, %f, %f, %f\n", i,
             VB->AttribPtr[0]->data[i][0],
             VB->AttribPtr[0]->data[i][1],
             VB->AttribPtr[0]->data[i][2],
             VB->AttribPtr[0]->data[i][3]);
      printf("   color: %f, %f, %f, %f\n",
             VB->AttribPtr[3]->data[i][0],
             VB->AttribPtr[3]->data[i][1],
             VB->AttribPtr[3]->data[i][2],
             VB->AttribPtr[3]->data[i][3]);
      printf("  normal: %f, %f, %f, %f\n",
             VB->AttribPtr[2]->data[i][0],
             VB->AttribPtr[2]->data[i][1],
             VB->AttribPtr[2]->data[i][2],
             VB->AttribPtr[2]->data[i][3]);
#endif

      /* the vertex array case */
      for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
	 if (program->info.inputs_read & BITFIELD64_BIT(attr)) {
	    const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data;
	    const GLuint size = VB->AttribPtr[attr]->size;
	    const GLuint stride = VB->AttribPtr[attr]->stride;
	    const GLfloat *data = (GLfloat *) (ptr + stride * i);
#ifdef NAN_CHECK
            check_float(data[0]);
            check_float(data[1]);
            check_float(data[2]);
            check_float(data[3]);
#endif
	    COPY_CLEAN_4V(machine->VertAttribs[attr], size, data);
	 }
      }

      /* execute the program */
      _mesa_execute_program(ctx, program, machine);

      /* copy the output registers into the VB->attribs arrays */
      for (j = 0; j < numOutputs; j++) {
         const GLuint attr = outputs[j];
#ifdef NAN_CHECK
         check_float(machine->Outputs[attr][0]);
         check_float(machine->Outputs[attr][1]);
         check_float(machine->Outputs[attr][2]);
         check_float(machine->Outputs[attr][3]);
#endif
         COPY_4V(store->results[attr].data[i], machine->Outputs[attr]);
      }

      /* FOGC is a special case.  Fragment shader expects (f,0,0,1) */
      if (program->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_FOGC)) {
         store->results[VARYING_SLOT_FOGC].data[i][1] = 0.0;
         store->results[VARYING_SLOT_FOGC].data[i][2] = 0.0;
         store->results[VARYING_SLOT_FOGC].data[i][3] = 1.0;
      }
#ifdef NAN_CHECK
      assert(machine->Outputs[0][3] != 0.0F);
#endif
#if 0
      printf("HPOS: %f %f %f %f\n",
             machine->Outputs[0][0], 
             machine->Outputs[0][1], 
             machine->Outputs[0][2], 
             machine->Outputs[0][3]);
#endif
   }

   unmap_textures(ctx, program);

   if (program->IsPositionInvariant) {
      /* We need the exact same transform as in the fixed function path here
       * to guarantee invariance, depending on compiler optimization flags
       * results could be different otherwise.
       */
      VB->ClipPtr = TransformRaw( &store->results[0],
				  &ctx->_ModelProjectMatrix,
				  VB->AttribPtr[0] );

      /* Drivers expect this to be clean to element 4...
       */
      switch (VB->ClipPtr->size) {
      case 1:
	 /* impossible */
      case 2:
	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 2 );
	 /* fall-through */
      case 3:
	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 3 );
	 /* fall-through */
      case 4:
	 break;
      }
   }
   else {
      /* Setup the VB pointers so that the next pipeline stages get
       * their data from the right place (the program output arrays).
       */
      VB->ClipPtr = &store->results[VARYING_SLOT_POS];
      VB->ClipPtr->size = 4;
      VB->ClipPtr->count = VB->Count;
   }

   VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VARYING_SLOT_COL0];
   VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VARYING_SLOT_COL1];
   VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VARYING_SLOT_FOGC];
   VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VARYING_SLOT_PSIZ];
   VB->BackfaceColorPtr = &store->results[VARYING_SLOT_BFC0];
   VB->BackfaceSecondaryColorPtr = &store->results[VARYING_SLOT_BFC1];

   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
      VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]
         = &store->results[VARYING_SLOT_TEX0 + i];
   }

   for (i = 0; i < ctx->Const.MaxVarying; i++) {
      if (program->info.outputs_written &
          BITFIELD64_BIT(VARYING_SLOT_VAR0 + i)) {
         /* Note: varying results get put into the generic attributes */
	 VB->AttribPtr[VERT_ATTRIB_GENERIC0+i]
            = &store->results[VARYING_SLOT_VAR0 + i];
      }
   }


   /* Perform NDC and cliptest operations:
    */
   return do_ndc_cliptest(ctx, store);
}
예제 #2
0
파일: vbo_exec_api.c 프로젝트: nikai3d/mesa
/**
 * Flush existing data, set new attrib size, replay copied vertices.
 * This is called when we transition from a small vertex attribute size
 * to a larger one.  Ex: glTexCoord2f -> glTexCoord4f.
 * We need to go back over the previous 2-component texcoords and insert
 * zero and one values.
 */ 
static void
vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec,
                             GLuint attr, GLuint newSize )
{
   struct gl_context *ctx = exec->ctx;
   struct vbo_context *vbo = vbo_context(ctx);
   const GLint lastcount = exec->vtx.vert_count;
   GLfloat *old_attrptr[VBO_ATTRIB_MAX];
   const GLuint old_vtx_size = exec->vtx.vertex_size; /* floats per vertex */
   const GLuint oldSize = exec->vtx.attrsz[attr];
   GLuint i;

   /* Run pipeline on current vertices, copy wrapped vertices
    * to exec->vtx.copied.
    */
   vbo_exec_wrap_buffers( exec );

   if (unlikely(exec->vtx.copied.nr)) {
      /* We're in the middle of a primitive, keep the old vertex
       * format around to be able to translate the copied vertices to
       * the new format.
       */
      memcpy(old_attrptr, exec->vtx.attrptr, sizeof(old_attrptr));
   }

   if (unlikely(oldSize)) {
      /* Do a COPY_TO_CURRENT to ensure back-copying works for the
       * case when the attribute already exists in the vertex and is
       * having its size increased.
       */
      vbo_exec_copy_to_current( exec );
   }

   /* Heuristic: Attempt to isolate attributes received outside
    * begin/end so that they don't bloat the vertices.
    */
   if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
       !oldSize && lastcount > 8 && exec->vtx.vertex_size) {
      vbo_exec_copy_to_current( exec );
      reset_attrfv( exec );
   }

   /* Fix up sizes:
    */
   exec->vtx.attrsz[attr] = newSize;
   exec->vtx.vertex_size += newSize - oldSize;
   exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / 
                         (exec->vtx.vertex_size * sizeof(GLfloat)));
   exec->vtx.vert_count = 0;
   exec->vtx.buffer_ptr = exec->vtx.buffer_map;

   if (unlikely(oldSize)) {
      /* Size changed, recalculate all the attrptr[] values
       */
      GLfloat *tmp = exec->vtx.vertex;

      for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
	 if (exec->vtx.attrsz[i]) {
	    exec->vtx.attrptr[i] = tmp;
	    tmp += exec->vtx.attrsz[i];
	 }
	 else
	    exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
      }

      /* Copy from current to repopulate the vertex with correct
       * values.
       */
      vbo_exec_copy_from_current( exec );
   }
   else {
      /* Just have to append the new attribute at the end */
      exec->vtx.attrptr[attr] = exec->vtx.vertex +
	 exec->vtx.vertex_size - newSize;
   }

   /* Replay stored vertices to translate them
    * to new format here.
    *
    * -- No need to replay - just copy piecewise
    */
   if (unlikely(exec->vtx.copied.nr)) {
      GLfloat *data = exec->vtx.copied.buffer;
      GLfloat *dest = exec->vtx.buffer_ptr;
      GLuint j;

      assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);

      for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
	 for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
	    GLuint sz = exec->vtx.attrsz[j];

	    if (sz) {
	       GLint old_offset = old_attrptr[j] - exec->vtx.vertex;
	       GLint new_offset = exec->vtx.attrptr[j] - exec->vtx.vertex;

	       if (j == attr) {
		  if (oldSize) {
		     GLfloat tmp[4];
		     COPY_CLEAN_4V(tmp, oldSize, data + old_offset);
		     COPY_SZ_4V(dest + new_offset, newSize, tmp);
		  } else {
		     GLfloat *current = (GLfloat *)vbo->currval[j].Ptr;
		     COPY_SZ_4V(dest + new_offset, sz, current);
		  }
	       }
	       else {
		  COPY_SZ_4V(dest + new_offset, sz, data + old_offset);
	       }
	    }
	 }

	 data += old_vtx_size;
	 dest += exec->vtx.vertex_size;
      }

      exec->vtx.buffer_ptr = dest;
      exec->vtx.vert_count += exec->vtx.copied.nr;
      exec->vtx.copied.nr = 0;
   }
}
예제 #3
0
/**
 * Flush existing data, set new attrib size, replay copied vertices.
 */ 
static void vbo_exec_wrap_upgrade_vertex( struct vbo_exec_context *exec,
					  GLuint attr,
					  GLuint newsz )
{
   GLcontext *ctx = exec->ctx;
   struct vbo_context *vbo = vbo_context(ctx);
   GLint lastcount = exec->vtx.vert_count;
   GLfloat *tmp;
   GLuint oldsz;
   GLuint i;

   /* Run pipeline on current vertices, copy wrapped vertices
    * to exec->vtx.copied.
    */
   vbo_exec_wrap_buffers( exec );


   /* Do a COPY_TO_CURRENT to ensure back-copying works for the case
    * when the attribute already exists in the vertex and is having
    * its size increased.  
    */
   vbo_exec_copy_to_current( exec );


   /* Heuristic: Attempt to isolate attributes received outside
    * begin/end so that they don't bloat the vertices.
    */
   if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
       exec->vtx.attrsz[attr] == 0 && 
       lastcount > 8 &&
       exec->vtx.vertex_size) {
      reset_attrfv( exec );
   }

   /* Fix up sizes:
    */
   oldsz = exec->vtx.attrsz[attr];
   exec->vtx.attrsz[attr] = newsz;

   exec->vtx.vertex_size += newsz - oldsz;
   exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / 
                         (exec->vtx.vertex_size * sizeof(GLfloat)));
   exec->vtx.vert_count = 0;
   exec->vtx.buffer_ptr = exec->vtx.buffer_map;
   

   /* Recalculate all the attrptr[] values
    */
   for (i = 0, tmp = exec->vtx.vertex ; i < VBO_ATTRIB_MAX ; i++) {
      if (exec->vtx.attrsz[i]) {
	 exec->vtx.attrptr[i] = tmp;
	 tmp += exec->vtx.attrsz[i];
      }
      else 
	 exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
   }

   /* Copy from current to repopulate the vertex with correct values.
    */
   vbo_exec_copy_from_current( exec );

   /* Replay stored vertices to translate them
    * to new format here.
    *
    * -- No need to replay - just copy piecewise
    */
   if (exec->vtx.copied.nr)
   {
      GLfloat *data = exec->vtx.copied.buffer;
      GLfloat *dest = exec->vtx.buffer_ptr;
      GLuint j;

      assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);
      
      for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
	 for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
	    if (exec->vtx.attrsz[j]) {
	       if (j == attr) {
		  if (oldsz) {
		     COPY_CLEAN_4V( dest, oldsz, data );
		     data += oldsz;
		     dest += newsz;
		  } else {
		     const GLfloat *current = (const GLfloat *)vbo->currval[j].Ptr;
		     COPY_SZ_4V( dest, newsz, current );
		     dest += newsz;
		  }
	       }
	       else {
		  GLuint sz = exec->vtx.attrsz[j];
		  COPY_SZ_4V( dest, sz, data );
		  dest += sz;
		  data += sz;
	       }
	    }
	 }
      }

      exec->vtx.buffer_ptr = dest;
      exec->vtx.vert_count += exec->vtx.copied.nr;
      exec->vtx.copied.nr = 0;
   }
}
예제 #4
0
/**
 * This function executes vertex programs
 */
static GLboolean
run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage )
{
   TNLcontext *tnl = TNL_CONTEXT(ctx);
   struct vp_stage_data *store = VP_STAGE_DATA(stage);
   struct vertex_buffer *VB = &tnl->vb;
   struct gl_vertex_program *program = ctx->VertexProgram._Current;
   struct gl_program_machine machine;
   GLuint outputs[VERT_RESULT_MAX], numOutputs;
   GLuint i, j;

   if (!program)
      return GL_TRUE;

   if (program->IsNVProgram) {
      _mesa_load_tracked_matrices(ctx);
   }
   else {
      /* ARB program or vertex shader */
      _mesa_load_state_parameters(ctx, program->Base.Parameters);
   }

   numOutputs = 0;
   for (i = 0; i < VERT_RESULT_MAX; i++) {
      if (program->Base.OutputsWritten & (1 << i)) {
         outputs[numOutputs++] = i;
      }
   }

   for (i = 0; i < VB->Count; i++) {
      GLuint attr;

      init_machine(ctx, &machine);

#if 0
      printf("Input  %d: %f, %f, %f, %f\n", i,
             VB->AttribPtr[0]->data[i][0],
             VB->AttribPtr[0]->data[i][1],
             VB->AttribPtr[0]->data[i][2],
             VB->AttribPtr[0]->data[i][3]);
      printf("   color: %f, %f, %f, %f\n",
             VB->AttribPtr[3]->data[i][0],
             VB->AttribPtr[3]->data[i][1],
             VB->AttribPtr[3]->data[i][2],
             VB->AttribPtr[3]->data[i][3]);
      printf("  normal: %f, %f, %f, %f\n",
             VB->AttribPtr[2]->data[i][0],
             VB->AttribPtr[2]->data[i][1],
             VB->AttribPtr[2]->data[i][2],
             VB->AttribPtr[2]->data[i][3]);
#endif

      /* the vertex array case */
      for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
	 if (program->Base.InputsRead & (1 << attr)) {
	    const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data;
	    const GLuint size = VB->AttribPtr[attr]->size;
	    const GLuint stride = VB->AttribPtr[attr]->stride;
	    const GLfloat *data = (GLfloat *) (ptr + stride * i);
	    COPY_CLEAN_4V(machine.VertAttribs[attr], size, data);
	 }
      }

      /* execute the program */
      _mesa_execute_program(ctx, &program->Base, &machine);

      /* copy the output registers into the VB->attribs arrays */
      for (j = 0; j < numOutputs; j++) {
         const GLuint attr = outputs[j];
         COPY_4V(store->results[attr].data[i], machine.Outputs[attr]);
      }
#if 0
      printf("HPOS: %f %f %f %f\n",
             machine.Outputs[0][0], 
             machine.Outputs[0][1], 
             machine.Outputs[0][2], 
             machine.Outputs[0][3]);
#endif
   }

   /* Fixup fog and point size results if needed */
   if (program->IsNVProgram) {
      if (ctx->Fog.Enabled &&
          (program->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_FOGC].data[i][0] = 1.0;
         }
      }

      if (ctx->VertexProgram.PointSizeEnabled &&
          (program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size;
         }
      }
   }

   /* Setup the VB pointers so that the next pipeline stages get
    * their data from the right place (the program output arrays).
    */
   VB->ClipPtr = &store->results[VERT_RESULT_HPOS];
   VB->ClipPtr->size = 4;
   VB->ClipPtr->count = VB->Count;
   VB->ColorPtr[0] = &store->results[VERT_RESULT_COL0];
   VB->ColorPtr[1] = &store->results[VERT_RESULT_BFC0];
   VB->SecondaryColorPtr[0] = &store->results[VERT_RESULT_COL1];
   VB->SecondaryColorPtr[1] = &store->results[VERT_RESULT_BFC1];
   VB->FogCoordPtr = &store->results[VERT_RESULT_FOGC];

   VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VERT_RESULT_COL0];
   VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VERT_RESULT_COL1];
   VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VERT_RESULT_FOGC];
   VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VERT_RESULT_PSIZ];

   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
      VB->TexCoordPtr[i] = 
      VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]
         = &store->results[VERT_RESULT_TEX0 + i];
   }

   for (i = 0; i < ctx->Const.MaxVarying; i++) {
      if (program->Base.OutputsWritten & (1 << (VERT_RESULT_VAR0 + i))) {
         /* Note: varying results get put into the generic attributes */
	 VB->AttribPtr[VERT_ATTRIB_GENERIC0+i]
            = &store->results[VERT_RESULT_VAR0 + i];
      }
   }

   /* Cliptest and perspective divide.  Clip functions must clear
    * the clipmask.
    */
   store->ormask = 0;
   store->andmask = CLIP_FRUSTUM_BITS;

   if (tnl->NeedNdcCoords) {
      VB->NdcPtr =
         _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
                                            &store->ndcCoords,
                                            store->clipmask,
                                            &store->ormask,
                                            &store->andmask );
   }
   else {
      VB->NdcPtr = NULL;
      _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
                                            NULL,
                                            store->clipmask,
                                            &store->ormask,
                                            &store->andmask );
   }

   if (store->andmask)  /* All vertices are outside the frustum */
      return GL_FALSE;


   /* This is where we'd do clip testing against the user-defined
    * clipping planes, but they're not supported by vertex programs.
    */

   VB->ClipOrMask = store->ormask;
   VB->ClipMask = store->clipmask;

   return GL_TRUE;
}
예제 #5
0
/* Flush existing data, set new attrib size, replay copied vertices.
 */
static void
_save_upgrade_vertex(struct gl_context *ctx, GLuint attr, GLuint newsz)
{
   struct vbo_save_context *save = &vbo_context(ctx)->save;
   GLuint oldsz;
   GLuint i;
   GLfloat *tmp;

   /* Store the current run of vertices, and emit a GL_END.  Emit a
    * BEGIN in the new buffer.
    */
   if (save->vert_count)
      _save_wrap_buffers(ctx);
   else
      assert(save->copied.nr == 0);

   /* Do a COPY_TO_CURRENT to ensure back-copying works for the case
    * when the attribute already exists in the vertex and is having
    * its size increased.  
    */
   _save_copy_to_current(ctx);

   /* Fix up sizes:
    */
   oldsz = save->attrsz[attr];
   save->attrsz[attr] = newsz;

   save->vertex_size += newsz - oldsz;
   save->max_vert = ((VBO_SAVE_BUFFER_SIZE - save->vertex_store->used) /
                     save->vertex_size);
   save->vert_count = 0;

   /* Recalculate all the attrptr[] values:
    */
   for (i = 0, tmp = save->vertex; i < VBO_ATTRIB_MAX; i++) {
      if (save->attrsz[i]) {
         save->attrptr[i] = tmp;
         tmp += save->attrsz[i];
      }
      else {
         save->attrptr[i] = NULL;       /* will not be dereferenced. */
      }
   }

   /* Copy from current to repopulate the vertex with correct values.
    */
   _save_copy_from_current(ctx);

   /* Replay stored vertices to translate them to new format here.
    *
    * If there are copied vertices and the new (upgraded) attribute
    * has not been defined before, this list is somewhat degenerate,
    * and will need fixup at runtime.
    */
   if (save->copied.nr) {
      GLfloat *data = save->copied.buffer;
      GLfloat *dest = save->buffer;
      GLuint j;

      /* Need to note this and fix up at runtime (or loopback):
       */
      if (attr != VBO_ATTRIB_POS && save->currentsz[attr][0] == 0) {
         assert(oldsz == 0);
         save->dangling_attr_ref = GL_TRUE;
      }

      for (i = 0; i < save->copied.nr; i++) {
         for (j = 0; j < VBO_ATTRIB_MAX; j++) {
            if (save->attrsz[j]) {
               if (j == attr) {
                  if (oldsz) {
                     COPY_CLEAN_4V(dest, oldsz, data);
                     data += oldsz;
                     dest += newsz;
                  }
                  else {
                     COPY_SZ_4V(dest, newsz, save->current[attr]);
                     dest += newsz;
                  }
               }
               else {
                  GLint sz = save->attrsz[j];
                  COPY_SZ_4V(dest, sz, data);
                  data += sz;
                  dest += sz;
               }
            }
         }
      }

      save->buffer_ptr = dest;
      save->vert_count += save->copied.nr;
   }
}
/**
 * This function executes vertex programs
 */
static GLboolean
run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage )
{
   TNLcontext *tnl = TNL_CONTEXT(ctx);
   struct vp_stage_data *store = VP_STAGE_DATA(stage);
   struct vertex_buffer *VB = &tnl->vb;
   struct gl_vertex_program *program = ctx->VertexProgram._Current;
   struct gl_program_machine machine;
   GLuint outputs[VERT_RESULT_MAX], numOutputs;
   GLuint i, j;

   if (!program)
      return GL_TRUE;

   if (program->IsNVProgram) {
      _mesa_load_tracked_matrices(ctx);
   }
   else {
      /* ARB program or vertex shader */
      _mesa_load_state_parameters(ctx, program->Base.Parameters);
   }

   /* make list of outputs to save some time below */
   numOutputs = 0;
   for (i = 0; i < VERT_RESULT_MAX; i++) {
      if (program->Base.OutputsWritten & (1 << i)) {
         outputs[numOutputs++] = i;
      }
   }

   map_textures(ctx, program);

   for (i = 0; i < VB->Count; i++) {
      GLuint attr;

      init_machine(ctx, &machine);

#if 0
      printf("Input  %d: %f, %f, %f, %f\n", i,
             VB->AttribPtr[0]->data[i][0],
             VB->AttribPtr[0]->data[i][1],
             VB->AttribPtr[0]->data[i][2],
             VB->AttribPtr[0]->data[i][3]);
      printf("   color: %f, %f, %f, %f\n",
             VB->AttribPtr[3]->data[i][0],
             VB->AttribPtr[3]->data[i][1],
             VB->AttribPtr[3]->data[i][2],
             VB->AttribPtr[3]->data[i][3]);
      printf("  normal: %f, %f, %f, %f\n",
             VB->AttribPtr[2]->data[i][0],
             VB->AttribPtr[2]->data[i][1],
             VB->AttribPtr[2]->data[i][2],
             VB->AttribPtr[2]->data[i][3]);
#endif

      /* the vertex array case */
      for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
	 if (program->Base.InputsRead & (1 << attr)) {
	    const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data;
	    const GLuint size = VB->AttribPtr[attr]->size;
	    const GLuint stride = VB->AttribPtr[attr]->stride;
	    const GLfloat *data = (GLfloat *) (ptr + stride * i);
	    COPY_CLEAN_4V(machine.VertAttribs[attr], size, data);
	 }
      }

      /* execute the program */
      _mesa_execute_program(ctx, &program->Base, &machine);

      /* copy the output registers into the VB->attribs arrays */
      for (j = 0; j < numOutputs; j++) {
         const GLuint attr = outputs[j];
         COPY_4V(store->results[attr].data[i], machine.Outputs[attr]);
      }
#if 0
      printf("HPOS: %f %f %f %f\n",
             machine.Outputs[0][0], 
             machine.Outputs[0][1], 
             machine.Outputs[0][2], 
             machine.Outputs[0][3]);
#endif
   }

   unmap_textures(ctx, program);

   /* Fixup fog and point size results if needed */
   if (program->IsNVProgram) {
      if (ctx->Fog.Enabled &&
          (program->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_FOGC].data[i][0] = 1.0;
         }
      }

      if (ctx->VertexProgram.PointSizeEnabled &&
          (program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) == 0) {
         for (i = 0; i < VB->Count; i++) {
            store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size;
         }
      }
   }

   if (program->IsPositionInvariant) {
      /* We need the exact same transform as in the fixed function path here
       * to guarantee invariance, depending on compiler optimization flags
       * results could be different otherwise.
       */
      VB->ClipPtr = TransformRaw( &store->results[0],
				  &ctx->_ModelProjectMatrix,
				  VB->AttribPtr[0] );

      /* Drivers expect this to be clean to element 4...
       */
      switch (VB->ClipPtr->size) {
      case 1:
	 /* impossible */
      case 2:
	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 2 );
	 /* fall-through */
      case 3:
	 _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 3 );
	 /* fall-through */
      case 4:
	 break;
      }
   }
   else {
      /* Setup the VB pointers so that the next pipeline stages get
       * their data from the right place (the program output arrays).
       */
      VB->ClipPtr = &store->results[VERT_RESULT_HPOS];
      VB->ClipPtr->size = 4;
      VB->ClipPtr->count = VB->Count;
   }

   VB->ColorPtr[0] = &store->results[VERT_RESULT_COL0];
   VB->ColorPtr[1] = &store->results[VERT_RESULT_BFC0];
   VB->SecondaryColorPtr[0] = &store->results[VERT_RESULT_COL1];
   VB->SecondaryColorPtr[1] = &store->results[VERT_RESULT_BFC1];
   VB->FogCoordPtr = &store->results[VERT_RESULT_FOGC];

   VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VERT_RESULT_COL0];
   VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VERT_RESULT_COL1];
   VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VERT_RESULT_FOGC];
   VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VERT_RESULT_PSIZ];

   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
      VB->TexCoordPtr[i] = 
      VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]
         = &store->results[VERT_RESULT_TEX0 + i];
   }

   for (i = 0; i < ctx->Const.MaxVarying; i++) {
      if (program->Base.OutputsWritten & (1 << (VERT_RESULT_VAR0 + i))) {
         /* Note: varying results get put into the generic attributes */
	 VB->AttribPtr[VERT_ATTRIB_GENERIC0+i]
            = &store->results[VERT_RESULT_VAR0 + i];
      }
   }


   /* Perform NDC and cliptest operations:
    */
   return do_ndc_cliptest(ctx, store);
}