void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; int delta, i, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; uint32_t offset, size; if (glarray->InstanceDivisor) { offset = buffer->offset; size = (buffer->stride * ((brw->num_instances / glarray->InstanceDivisor) - 1) + glarray->_ElementSize); } else { if (min_index == -1) { offset = 0; size = intel_buffer->Base.Size; } else { offset = buffer->offset + min_index * buffer->stride; size = (buffer->stride * (max_index - min_index) + glarray->_ElementSize); } } buffer->bo = intel_bufferobj_buffer(brw, intel_buffer, offset, size); drm_intel_bo_reference(buffer->bo); input->buffer = j++; input->offset = 0; } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the * GTT. We would hope that the VBO's max_index would save us, but * Mesa appears to hand us min/max values not clipped to the * array object's _MaxElement, and _MaxElement frequently appears * to be wrong anyway. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than * trying to just not render. */ assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }
static void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0, total_size = 0; unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; int delta, i, j; GLboolean can_merge_uploads = GL_TRUE; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* First build an array of pointers to ve's in vb.inputs_read */ if (0) printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); if (input->glarray->Size && get_size(input->glarray->Type)) brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) goto prepare; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; int type_size = get_size(glarray->Type); input->element_size = type_size * glarray->Size; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->bo = intel_bufferobj_source(intel, intel_buffer, type_size, &buffer->offset); drm_intel_bo_reference(buffer->bo); buffer->offset += (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; input->buffer = j++; input->offset = 0; } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the * GTT. We would hope that the VBO's max_index would save us, but * Mesa appears to hand us min/max values not clipped to the * array object's _MaxElement, and _MaxElement frequently appears * to be wrong anyway. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than * trying to just not render. */ assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { /* Position array not properly enabled: */ if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) { intel->Fallback = true; /* boolean, not bitfield */ return; } interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || (uintptr_t)(glarray->Ptr - ptr) > interleaved) { interleaved = 0; } else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1)) { /* enforce natural alignment (for doubles) */ interleaved = 0; } upload[nr_uploads++] = input; total_size = ALIGN(total_size, type_size); total_size += input->element_size; if (glarray->InstanceDivisor != 0) { can_merge_uploads = GL_FALSE; } } } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } if (delta && !brw->intel.intelScreen->relaxed_relocations) min_index = delta = 0; /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved && interleaved <= 2*total_size) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } else if ((total_size < 2048) && can_merge_uploads) { /* Upload non-interleaved arrays into a single interleaved array */ struct brw_vertex_buffer *buffer; int count = MAX2(max_index - min_index + 1, 1); int offset; char *map; map = intel_upload_map(&brw->intel, total_size * count, total_size); for (i = offset = 0; i < nr_uploads; i++) { const unsigned char *src = upload[i]->glarray->Ptr; int size = upload[i]->element_size; int stride = upload[i]->glarray->StrideB; char *dst; int n; offset = ALIGN(offset, get_size(upload[i]->glarray->Type)); dst = map + offset; src += min_index * stride; for (n = 0; n < count; n++) { memcpy(dst, src, size); src += stride; dst += total_size; } upload[i]->offset = offset; upload[i]->buffer = j; offset += size; } assert(offset == total_size); buffer = &brw->vb.buffers[j++]; intel_upload_unmap(&brw->intel, map, offset * count, offset, &buffer->bo, &buffer->offset); buffer->stride = offset; buffer->step_rate = 0; buffer->offset -= delta * offset; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->element_size); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->element_size); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } /* can we simply extend the current vb? */ if (j == brw->vb.nr_current_buffers) { int delta = 0; for (i = 0; i < j; i++) { int d; if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle || brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride || brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate) break; d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; if (d < 0) break; if (i == 0) delta = d / brw->vb.current_buffers[i].stride; if (delta * brw->vb.current_buffers[i].stride != d) break; } if (i == j) { brw->vb.start_vertex_bias += delta; while (--j >= 0) drm_intel_bo_unreference(brw->vb.buffers[j].bo); j = 0; } } brw->vb.nr_buffers = j; prepare: brw_prepare_query_begin(brw); }
GLboolean brw_upload_vertices( struct brw_context *brw, GLuint min_index, GLuint max_index ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint tmp = brw->vs.prog_data->inputs_read; struct brw_vertex_element_packet vep; struct brw_array_state vbp; GLuint i; const void *ptr = NULL; GLuint interleave = 0; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; memset(&vbp, 0, sizeof(vbp)); memset(&vep, 0, sizeof(vep)); /* First build an array of pointers to ve's in vb.inputs_read */ if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); while (tmp) { GLuint i = _mesa_ffsll(tmp)-1; struct brw_vertex_element *input = &brw->vb.inputs[i]; tmp &= ~(1<<i); enabled[nr_enabled++] = input; input->index = i; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (!input->glarray->BufferObj->Name) { if (i == 0) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) return GL_FALSE; interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; } else if (interleave != input->glarray->StrideB || (const char *)input->glarray->Ptr - (const char *)ptr < 0 || (const char *)input->glarray->Ptr - (const char *)ptr > interleave) { interleave = 0; } upload[nr_uploads++] = input; /* We rebase drawing to start at element zero only when * varyings are not in vbos, which means we can end up * uploading non-varying arrays (stride != 0) when min_index * is zero. This doesn't matter as the amount to upload is * the same for these arrays whether the draw call is rebased * or not - we just have to upload the one element. */ assert(min_index == 0 || input->glarray->StrideB == 0); } } /* Upload interleaved arrays if all uploads are interleaved */ if (nr_uploads > 1 && interleave && interleave <= 256) { struct brw_vertex_element *input0 = upload[0]; input0->glarray = copy_array_to_vbo_array(brw, 0, input0->glarray, interleave, input0->count); for (i = 1; i < nr_uploads; i++) { upload[i]->glarray = interleaved_vbo_array(brw, i, input0->glarray, upload[i]->glarray, ptr); } } else { for (i = 0; i < nr_uploads; i++) { struct brw_vertex_element *input = upload[i]; input->glarray = copy_array_to_vbo_array(brw, i, input->glarray, input->element_size, input->count); } } /* XXX: In the rare cases where this happens we fallback all * the way to software rasterization, although a tnl fallback * would be sufficient. I don't know of *any* real world * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ if (nr_enabled >= BRW_VEP_MAX) return GL_FALSE; /* This still defines a hardware VB for each input, even if they * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = enabled[i]; input->vep = &vep.ve[i]; input->vep->ve0.src_format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Normalized); input->vep->ve0.valid = 1; input->vep->ve1.dst_offset = (i) * 4; input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; switch (input->glarray->Size) { case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0; case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; break; } input->vep->ve0.vertex_buffer_index = i; input->vep->ve0.src_offset = 0; vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB; vbp.vb[i].vb0.bits.pad = 0; vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; vbp.vb[i].vb0.bits.vb_index = i; vbp.vb[i].offset = (GLuint)input->glarray->Ptr; vbp.vb[i].buffer = array_buffer(input->glarray); vbp.vb[i].max_index = max_index; } /* Now emit VB and VEP state packets: */ vbp.header.bits.length = (1 + nr_enabled * 4) - 2; vbp.header.bits.opcode = CMD_VERTEX_BUFFER; BEGIN_BATCH(vbp.header.bits.length+2, 0); OUT_BATCH( vbp.header.dword ); for (i = 0; i < nr_enabled; i++) { OUT_BATCH( vbp.vb[i].vb0.dword ); OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset); OUT_BATCH( vbp.vb[i].max_index ); OUT_BATCH( vbp.vb[i].instance_data_step_rate ); } ADVANCE_BATCH(); vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; vep.header.opcode = CMD_VERTEX_ELEMENT; brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); return GL_TRUE; }
void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_VS_PROG_DATA */ const struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(brw->vs.base.prog_data); GLbitfield64 vs_inputs = vs_prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; unsigned i; int delta, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint index = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[index]; vs_inputs &= ~BITFIELD64_BIT(index); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; /* The range of data in a given buffer represented as [min, max) */ struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; uint32_t buffer_range_start[VERT_ATTRIB_MAX]; uint32_t buffer_range_end[VERT_ATTRIB_MAX]; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); const uint32_t offset = (uintptr_t)glarray->Ptr; /* Start with the worst case */ uint32_t start = 0; uint32_t range = intel_buffer->Base.Size; if (glarray->InstanceDivisor) { if (brw->num_instances) { start = offset + glarray->StrideB * brw->baseinstance; range = (glarray->StrideB * ((brw->num_instances - 1) / glarray->InstanceDivisor) + glarray->_ElementSize); } } else { if (brw->vb.index_bounds_valid) { start = offset + min_index * glarray->StrideB; range = (glarray->StrideB * (max_index - min_index) + glarray->_ElementSize); } } /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ unsigned k; for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; buffer_range_start[input->buffer] = MIN2(buffer_range_start[input->buffer], start); buffer_range_end[input->buffer] = MAX2(buffer_range_end[input->buffer], start + range); break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = offset; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; buffer->size = glarray->BufferObj->Size - offset; enabled_buffer[j] = intel_buffer; buffer_range_start[j] = start; buffer_range_end[j] = start + range; input->buffer = j++; input->offset = 0; } } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* Now that we've set up all of the buffers, we walk through and reference * each of them. We do this late so that we get the right size in each * buffer and don't reference too little data. */ for (i = 0; i < j; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; if (buffer->bo) continue; const uint32_t start = buffer_range_start[i]; const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); drm_intel_bo_reference(buffer->bo); } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; buffer->size += delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->size += delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }
int brw_prepare_vertices( struct brw_context *brw, GLuint min_index, GLuint max_index ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint tmp = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; int ret = 0; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* First build an array of pointers to ve's in vb.inputs_read */ if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ while (tmp) { GLuint i = _mesa_ffsll(tmp)-1; struct brw_vertex_element *input = &brw->vb.inputs[i]; tmp &= ~(1<<i); enabled[nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all * the way to software rasterization, although a tnl fallback * would be sufficient. I don't know of *any* real world * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ if (nr_enabled >= BRW_VEP_MAX) return -1; for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->glarray->BufferObj->Name != 0) { struct intel_buffer_object *intel_buffer = intel_buffer_object(input->glarray->BufferObj); /* Named buffer object: Just reference its contents directly. */ input->bo = intel_bufferobj_buffer(intel, intel_buffer, INTEL_READ); dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; ret |= dri_bufmgr_check_aperture_space(input->bo); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (i == 0) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) return -1; interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; } else if (interleave != input->glarray->StrideB || (const unsigned char *)input->glarray->Ptr - ptr < 0 || (const unsigned char *)input->glarray->Ptr - ptr > interleave) { interleave = 0; } upload[nr_uploads++] = input; /* We rebase drawing to start at element zero only when * varyings are not in vbos, which means we can end up * uploading non-varying arrays (stride != 0) when min_index * is zero. This doesn't matter as the amount to upload is * the same for these arrays whether the draw call is rebased * or not - we just have to upload the one element. */ assert(min_index == 0 || input->glarray->StrideB == 0); } } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1 && interleave && interleave <= 256) { /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], interleave); ret |= dri_bufmgr_check_aperture_space(upload[0]->bo); for (i = 1; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->stride = interleave; upload[i]->offset = upload[0]->offset + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->bo = upload[0]->bo; dri_bo_reference(upload[i]->bo); } } else { /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); if (upload[i]->bo) { ret |= dri_bufmgr_check_aperture_space(upload[i]->bo); } } } if (ret) return 1; return 0; }