예제 #1
0
void
brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* CACHE_NEW_VS_PROG */
   GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0;
   unsigned int min_index = brw->vb.min_index + brw->basevertex;
   unsigned int max_index = brw->vb.max_index + brw->basevertex;
   int delta, i, j;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* _NEW_POLYGON
    *
    * On gen6+, edge flags don't end up in the VUE (either in or out of the
    * VS).  Instead, they're uploaded as the last vertex element, and the data
    * is passed sideband through the fixed function units.  So, we need to
    * prepare the vertex buffer for it, but it's not present in inputs_read.
    */
   if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
                           ctx->Polygon.BackMode != GL_FILL)) {
      vs_inputs |= VERT_BIT_EDGEFLAG;
   }

   if (0)
      fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint i = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      vs_inputs &= ~BITFIELD64_BIT(i);
      brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      return;

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);
	 int k;

	 /* If we have a VB set to be uploaded for this buffer object
	  * already, reuse that VB state so that we emit fewer
	  * relocations.
	  */
	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
	    buffer->offset = (uintptr_t)glarray->Ptr;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;

            uint32_t offset, size;
            if (glarray->InstanceDivisor) {
               offset = buffer->offset;
               size = (buffer->stride * ((brw->num_instances /
                                          glarray->InstanceDivisor) - 1) +
                       glarray->_ElementSize);
            } else {
               if (min_index == -1) {
                  offset = 0;
                  size = intel_buffer->Base.Size;
               } else {
                  offset = buffer->offset + min_index * buffer->stride;
                  size = (buffer->stride * (max_index - min_index) +
                          glarray->_ElementSize);
               }
            }
            buffer->bo = intel_bufferobj_buffer(brw, intel_buffer,
                                                offset, size);
            drm_intel_bo_reference(buffer->bo);

	    input->buffer = j++;
	    input->offset = 0;
	 }

	 /* This is a common place to reach if the user mistakenly supplies
	  * a pointer in place of a VBO offset.  If we just let it go through,
	  * we may end up dereferencing a pointer beyond the bounds of the
	  * GTT.  We would hope that the VBO's max_index would save us, but
	  * Mesa appears to hand us min/max values not clipped to the
	  * array object's _MaxElement, and _MaxElement frequently appears
	  * to be wrong anyway.
	  *
	  * The VBO spec allows application termination in this case, and it's
	  * probably a service to the poor programmer to do so rather than
	  * trying to just not render.
	  */
	 assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
                  glarray->Ptr < ptr ||
                  (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
	 {
            /* If our stride is different from the first attribute's stride,
             * or if the first attribute's stride didn't cover our element,
             * disable the interleaved upload optimization.  The second case
             * can most commonly occur in cases where there is a single vertex
             * and, for example, the data is stored on the application's
             * stack.
             *
             * NOTE: This will also disable the optimization in cases where
             * the data is in a different order than the array indices.
             * Something like:
             *
             *     float data[...];
             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
             */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;
      }
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   brw->vb.nr_buffers = j;
}
예제 #2
0
static void brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   /* CACHE_NEW_VS_PROG */
   GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0, total_size = 0;
   unsigned int min_index = brw->vb.min_index;
   unsigned int max_index = brw->vb.max_index;
   int delta, i, j;
   GLboolean can_merge_uploads = GL_TRUE;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* First build an array of pointers to ve's in vb.inputs_read
    */
   if (0)
      printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint i = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      vs_inputs &= ~BITFIELD64_BIT(i);
      if (input->glarray->Size && get_size(input->glarray->Type))
         brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      goto prepare;

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;
      int type_size = get_size(glarray->Type);

      input->element_size = type_size * glarray->Size;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);
	 int k;

	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
            buffer->bo = intel_bufferobj_source(intel,
                                                intel_buffer, type_size,
						&buffer->offset);
	    drm_intel_bo_reference(buffer->bo);
	    buffer->offset += (uintptr_t)glarray->Ptr;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;

	    input->buffer = j++;
	    input->offset = 0;
	 }

	 /* This is a common place to reach if the user mistakenly supplies
	  * a pointer in place of a VBO offset.  If we just let it go through,
	  * we may end up dereferencing a pointer beyond the bounds of the
	  * GTT.  We would hope that the VBO's max_index would save us, but
	  * Mesa appears to hand us min/max values not clipped to the
	  * array object's _MaxElement, and _MaxElement frequently appears
	  * to be wrong anyway.
	  *
	  * The VBO spec allows application termination in this case, and it's
	  * probably a service to the poor programmer to do so rather than
	  * trying to just not render.
	  */
	 assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    /* Position array not properly enabled:
	     */
	    if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) {
               intel->Fallback = true; /* boolean, not bitfield */
               return;
            }

	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
		  (uintptr_t)(glarray->Ptr - ptr) > interleaved)
	 {
	    interleaved = 0;
	 }
	 else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1))
	 {
	    /* enforce natural alignment (for doubles) */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;

	 total_size = ALIGN(total_size, type_size);
	 total_size += input->element_size;

         if (glarray->InstanceDivisor != 0) {
            can_merge_uploads = GL_FALSE;
         }
      }
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }
   if (delta && !brw->intel.intelScreen->relaxed_relocations)
      min_index = delta = 0;

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved && interleaved <= 2*total_size) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
      else if ((total_size < 2048) && can_merge_uploads) {
	 /* Upload non-interleaved arrays into a single interleaved array */
	 struct brw_vertex_buffer *buffer;
	 int count = MAX2(max_index - min_index + 1, 1);
	 int offset;
	 char *map;

	 map = intel_upload_map(&brw->intel, total_size * count, total_size);
	 for (i = offset = 0; i < nr_uploads; i++) {
	    const unsigned char *src = upload[i]->glarray->Ptr;
	    int size = upload[i]->element_size;
	    int stride = upload[i]->glarray->StrideB;
	    char *dst;
	    int n;

	    offset = ALIGN(offset, get_size(upload[i]->glarray->Type));
	    dst = map + offset;
	    src += min_index * stride;

	    for (n = 0; n < count; n++) {
	       memcpy(dst, src, size);
	       src += stride;
	       dst += total_size;
	    }

	    upload[i]->offset = offset;
	    upload[i]->buffer = j;

	    offset += size;
	 }
	 assert(offset == total_size);
	 buffer = &brw->vb.buffers[j++];
	 intel_upload_unmap(&brw->intel, map, offset * count, offset,
			    &buffer->bo, &buffer->offset);
	 buffer->stride = offset;
	 buffer->step_rate = 0;
	 buffer->offset -= delta * offset;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->element_size);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->element_size);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   /* can we simply extend the current vb? */
   if (j == brw->vb.nr_current_buffers) {
      int delta = 0;
      for (i = 0; i < j; i++) {
	 int d;

	 if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle ||
	     brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride ||
	     brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate)
	    break;

	 d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset;
	 if (d < 0)
	    break;
	 if (i == 0)
	    delta = d / brw->vb.current_buffers[i].stride;
	 if (delta * brw->vb.current_buffers[i].stride != d)
	    break;
      }

      if (i == j) {
	 brw->vb.start_vertex_bias += delta;
	 while (--j >= 0)
	    drm_intel_bo_unreference(brw->vb.buffers[j].bo);
	 j = 0;
      }
   }

   brw->vb.nr_buffers = j;

prepare:
   brw_prepare_query_begin(brw);
}
예제 #3
0
GLboolean brw_upload_vertices( struct brw_context *brw,
			       GLuint min_index,
			       GLuint max_index )
{
   GLcontext *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   GLuint tmp = brw->vs.prog_data->inputs_read; 
   struct brw_vertex_element_packet vep;
   struct brw_array_state vbp;
   GLuint i;
   const void *ptr = NULL;
   GLuint interleave = 0;

   struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
   GLuint nr_enabled = 0;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;
   

   memset(&vbp, 0, sizeof(vbp));
   memset(&vep, 0, sizeof(vep));

   /* First build an array of pointers to ve's in vb.inputs_read
    */
   if (0)
      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
   
   while (tmp) {
      GLuint i = _mesa_ffsll(tmp)-1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      tmp &= ~(1<<i);
      enabled[nr_enabled++] = input;

      input->index = i;
      input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
      input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;

      if (!input->glarray->BufferObj->Name) {
	 if (i == 0) {
	    /* Position array not properly enabled:
	     */
	    if (input->glarray->StrideB == 0)
	       return GL_FALSE;

	    interleave = input->glarray->StrideB;
	    ptr = input->glarray->Ptr;
	 }
	 else if (interleave != input->glarray->StrideB ||
		  (const char *)input->glarray->Ptr - (const char *)ptr < 0 ||
		  (const char *)input->glarray->Ptr - (const char *)ptr > interleave) {
	    interleave = 0;
	 }

	 upload[nr_uploads++] = input;
	 
	 /* We rebase drawing to start at element zero only when
	  * varyings are not in vbos, which means we can end up
	  * uploading non-varying arrays (stride != 0) when min_index
	  * is zero.  This doesn't matter as the amount to upload is
	  * the same for these arrays whether the draw call is rebased
	  * or not - we just have to upload the one element.
	  */
	 assert(min_index == 0 || input->glarray->StrideB == 0);
      }
   }

   /* Upload interleaved arrays if all uploads are interleaved
    */
   if (nr_uploads > 1 && 
       interleave && 
       interleave <= 256) {
      struct brw_vertex_element *input0 = upload[0];

      input0->glarray = copy_array_to_vbo_array(brw, 0,
						input0->glarray, 
						interleave,
						input0->count);

      for (i = 1; i < nr_uploads; i++) {
	 upload[i]->glarray = interleaved_vbo_array(brw,
						    i,
						    input0->glarray,
						    upload[i]->glarray,
						    ptr);
      }
   }
   else {
      for (i = 0; i < nr_uploads; i++) {
	 struct brw_vertex_element *input = upload[i];

	 input->glarray = copy_array_to_vbo_array(brw, i, 
						  input->glarray,
						  input->element_size,
						  input->count);

      }
   }

   /* XXX: In the rare cases where this happens we fallback all
    * the way to software rasterization, although a tnl fallback
    * would be sufficient.  I don't know of *any* real world
    * cases with > 17 vertex attributes enabled, so it probably
    * isn't an issue at this point.
    */
   if (nr_enabled >= BRW_VEP_MAX)
	 return GL_FALSE;

   /* This still defines a hardware VB for each input, even if they
    * are interleaved or from the same VBO.  TBD if this makes a
    * performance difference.
    */
   for (i = 0; i < nr_enabled; i++) {
      struct brw_vertex_element *input = enabled[i];

      input->vep = &vep.ve[i];
      input->vep->ve0.src_format = get_surface_type(input->glarray->Type, 
						    input->glarray->Size,
						    input->glarray->Normalized);
      input->vep->ve0.valid = 1;
      input->vep->ve1.dst_offset = (i) * 4;
      input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
      input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
      input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
      input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;

      switch (input->glarray->Size) {
      case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
      case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
      case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
      case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
	 break;
      }

      input->vep->ve0.vertex_buffer_index = i;
      input->vep->ve0.src_offset = 0;

      vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB;
      vbp.vb[i].vb0.bits.pad = 0;
      vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
      vbp.vb[i].vb0.bits.vb_index = i;
      vbp.vb[i].offset = (GLuint)input->glarray->Ptr;
      vbp.vb[i].buffer = array_buffer(input->glarray);
      vbp.vb[i].max_index = max_index;
   }



   /* Now emit VB and VEP state packets:
    */
   vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
   vbp.header.bits.opcode = CMD_VERTEX_BUFFER;

   BEGIN_BATCH(vbp.header.bits.length+2, 0);
   OUT_BATCH( vbp.header.dword );
   
   for (i = 0; i < nr_enabled; i++) {
      OUT_BATCH( vbp.vb[i].vb0.dword );
      OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset);
      OUT_BATCH( vbp.vb[i].max_index );
      OUT_BATCH( vbp.vb[i].instance_data_step_rate );
   }
   ADVANCE_BATCH();

   vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
   vep.header.opcode = CMD_VERTEX_ELEMENT;
   brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));

   return GL_TRUE;
}
예제 #4
0
void
brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_VS_PROG_DATA */
   const struct brw_vs_prog_data *vs_prog_data =
      brw_vs_prog_data(brw->vs.base.prog_data);
   GLbitfield64 vs_inputs = vs_prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0;
   unsigned int min_index = brw->vb.min_index + brw->basevertex;
   unsigned int max_index = brw->vb.max_index + brw->basevertex;
   unsigned i;
   int delta, j;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* _NEW_POLYGON
    *
    * On gen6+, edge flags don't end up in the VUE (either in or out of the
    * VS).  Instead, they're uploaded as the last vertex element, and the data
    * is passed sideband through the fixed function units.  So, we need to
    * prepare the vertex buffer for it, but it's not present in inputs_read.
    */
   if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
                           ctx->Polygon.BackMode != GL_FILL)) {
      vs_inputs |= VERT_BIT_EDGEFLAG;
   }

   if (0)
      fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint index = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[index];

      vs_inputs &= ~BITFIELD64_BIT(index);
      brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      return;

   /* The range of data in a given buffer represented as [min, max) */
   struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
   uint32_t buffer_range_start[VERT_ATTRIB_MAX];
   uint32_t buffer_range_end[VERT_ATTRIB_MAX];

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);

         const uint32_t offset = (uintptr_t)glarray->Ptr;

         /* Start with the worst case */
         uint32_t start = 0;
         uint32_t range = intel_buffer->Base.Size;
         if (glarray->InstanceDivisor) {
            if (brw->num_instances) {
               start = offset + glarray->StrideB * brw->baseinstance;
               range = (glarray->StrideB * ((brw->num_instances - 1) /
                                            glarray->InstanceDivisor) +
                        glarray->_ElementSize);
            }
         } else {
            if (brw->vb.index_bounds_valid) {
               start = offset + min_index * glarray->StrideB;
               range = (glarray->StrideB * (max_index - min_index) +
                        glarray->_ElementSize);
            }
         }

	 /* If we have a VB set to be uploaded for this buffer object
	  * already, reuse that VB state so that we emit fewer
	  * relocations.
	  */
	 unsigned k;
	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;

               buffer_range_start[input->buffer] =
                  MIN2(buffer_range_start[input->buffer], start);
               buffer_range_end[input->buffer] =
                  MAX2(buffer_range_end[input->buffer], start + range);
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
	    buffer->offset = offset;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;
            buffer->size = glarray->BufferObj->Size - offset;

            enabled_buffer[j] = intel_buffer;
            buffer_range_start[j] = start;
            buffer_range_end[j] = start + range;

	    input->buffer = j++;
	    input->offset = 0;
	 }
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
                  glarray->Ptr < ptr ||
                  (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
	 {
            /* If our stride is different from the first attribute's stride,
             * or if the first attribute's stride didn't cover our element,
             * disable the interleaved upload optimization.  The second case
             * can most commonly occur in cases where there is a single vertex
             * and, for example, the data is stored on the application's
             * stack.
             *
             * NOTE: This will also disable the optimization in cases where
             * the data is in a different order than the array indices.
             * Something like:
             *
             *     float data[...];
             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
             */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;
      }
   }

   /* Now that we've set up all of the buffers, we walk through and reference
    * each of them.  We do this late so that we get the right size in each
    * buffer and don't reference too little data.
    */
   for (i = 0; i < j; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
      if (buffer->bo)
         continue;

      const uint32_t start = buffer_range_start[i];
      const uint32_t range = buffer_range_end[i] - buffer_range_start[i];

      buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range);
      drm_intel_bo_reference(buffer->bo);
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;
         buffer->size += delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->size += delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   brw->vb.nr_buffers = j;
}
int brw_prepare_vertices( struct brw_context *brw,
			       GLuint min_index,
			       GLuint max_index )
{
   GLcontext *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   GLuint tmp = brw->vs.prog_data->inputs_read; 
   GLuint i;
   const unsigned char *ptr = NULL;
   GLuint interleave = 0;
   int ret = 0;

   struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
   GLuint nr_enabled = 0;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* First build an array of pointers to ve's in vb.inputs_read
    */
   if (0)
      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   while (tmp) {
      GLuint i = _mesa_ffsll(tmp)-1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      tmp &= ~(1<<i);
      enabled[nr_enabled++] = input;
   }

   /* XXX: In the rare cases where this happens we fallback all
    * the way to software rasterization, although a tnl fallback
    * would be sufficient.  I don't know of *any* real world
    * cases with > 17 vertex attributes enabled, so it probably
    * isn't an issue at this point.
    */
   if (nr_enabled >= BRW_VEP_MAX)
       return -1;

   for (i = 0; i < nr_enabled; i++) {
      struct brw_vertex_element *input = enabled[i];

      input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
      input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;

      if (input->glarray->BufferObj->Name != 0) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(input->glarray->BufferObj);

	 /* Named buffer object: Just reference its contents directly. */
	 input->bo = intel_bufferobj_buffer(intel, intel_buffer,
					    INTEL_READ);
	 dri_bo_reference(input->bo);
	 input->offset = (unsigned long)input->glarray->Ptr;
	 input->stride = input->glarray->StrideB;

	 ret |= dri_bufmgr_check_aperture_space(input->bo);
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (i == 0) {
	    /* Position array not properly enabled:
	     */
	    if (input->glarray->StrideB == 0)
	      return -1;

	    interleave = input->glarray->StrideB;
	    ptr = input->glarray->Ptr;
	 }
	 else if (interleave != input->glarray->StrideB ||
		  (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
		  (const unsigned char *)input->glarray->Ptr - ptr > interleave)
	 {
	    interleave = 0;
	 }

	 upload[nr_uploads++] = input;
	 
	 /* We rebase drawing to start at element zero only when
	  * varyings are not in vbos, which means we can end up
	  * uploading non-varying arrays (stride != 0) when min_index
	  * is zero.  This doesn't matter as the amount to upload is
	  * the same for these arrays whether the draw call is rebased
	  * or not - we just have to upload the one element.
	  */
	 assert(min_index == 0 || input->glarray->StrideB == 0);
      }
   }

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1 && interleave && interleave <= 256) {
      /* All uploads are interleaved, so upload the arrays together as
       * interleaved.  First, upload the contents and set up upload[0].
       */
      copy_array_to_vbo_array(brw, upload[0], interleave);

      ret |= dri_bufmgr_check_aperture_space(upload[0]->bo);
      for (i = 1; i < nr_uploads; i++) {
	 /* Then, just point upload[i] at upload[0]'s buffer. */
	 upload[i]->stride = interleave;
	 upload[i]->offset = upload[0]->offset +
	    ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	 upload[i]->bo = upload[0]->bo;
	 dri_bo_reference(upload[i]->bo);
      }
   }
   else {
      /* Upload non-interleaved arrays */
      for (i = 0; i < nr_uploads; i++) {
          copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
          if (upload[i]->bo) {
              ret |= dri_bufmgr_check_aperture_space(upload[i]->bo);
          }
      }
   }


   if (ret)
     return 1;


   return 0;
}