static void intel_bufferobj_copy_subdata(GLcontext *ctx, struct gl_buffer_object *src, struct gl_buffer_object *dst, GLintptr read_offset, GLintptr write_offset, GLsizeiptr size) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_src = intel_buffer_object(src); struct intel_buffer_object *intel_dst = intel_buffer_object(dst); drm_intel_bo *src_bo, *dst_bo; if (size == 0) return; /* If we're in system memory, just map and memcpy. */ if (intel_src->sys_buffer || intel_dst->sys_buffer) { /* The same buffer may be used, but note that regions copied may * not overlap. */ if (src == dst) { char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, GL_READ_WRITE, dst); memcpy(ptr + write_offset, ptr + read_offset, size); intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); } else { const char *src_ptr; char *dst_ptr; src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER, GL_READ_ONLY, src); dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, GL_WRITE_ONLY, dst); memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src); intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); } } /* Otherwise, we have real BOs, so blit them. */ dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ); intel_emit_linear_blit(intel, dst_bo, write_offset, src_bo, read_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ intel_batchbuffer_emit_mi_flush(intel->batch); }
/* Attach to a pbo, discarding our data. Effectively zero-copy upload * the pbo's data. */ void intel_region_attach_pbo(struct intel_context *intel, struct intel_region *region, struct intel_buffer_object *pbo) { dri_bo *buffer; if (region->pbo == pbo) return; _DBG("%s %p %p\n", __FUNCTION__, region, pbo); /* If there is already a pbo attached, break the cow tie now. * Don't call intel_region_release_pbo() as that would * unnecessarily allocate a new buffer we would have to immediately * discard. */ if (region->pbo) { region->pbo->region = NULL; region->pbo = NULL; } if (region->buffer) { dri_bo_unreference(region->buffer); region->buffer = NULL; } /* make sure pbo has a buffer of its own */ buffer = intel_bufferobj_buffer(intel, pbo, INTEL_WRITE_FULL); region->pbo = pbo; region->pbo->region = region; dri_bo_reference(buffer); region->buffer = buffer; }
static GLenum intel_buffer_object_purgeable(struct gl_context * ctx, struct gl_buffer_object *obj, GLenum option) { struct intel_buffer_object *intel_obj = intel_buffer_object (obj); if (intel_obj->buffer != NULL) return intel_buffer_purgeable(intel_obj->buffer); if (option == GL_RELEASED_APPLE) { if (intel_obj->sys_buffer != NULL) { free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; } return GL_RELEASED_APPLE; } else { /* XXX Create the buffer and madvise(MADV_DONTNEED)? */ struct intel_context *intel = intel_context(ctx); drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_obj, INTEL_READ); return intel_buffer_purgeable(bo); } }
void brw_draw_init( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct vbo_context *vbo = vbo_context(ctx); GLuint i; /* Register our drawing function: */ vbo->draw_prims = brw_draw_prims; brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE; for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) { brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); /* NOTE: These are set to no-backing-store. */ bmBufferSetInvalidateCB(&brw->intel, intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])), brw_invalidate_vbo_cb, &brw->intel, GL_TRUE); } ctx->Driver.BufferData( ctx, GL_ARRAY_BUFFER_ARB, BRW_UPLOAD_INIT_SIZE, NULL, GL_DYNAMIC_DRAW_ARB, brw->vb.upload.vbo[0] ); }
static bool intel_set_texture_storage_for_buffer_object(struct gl_context *ctx, struct gl_texture_object *tex_obj, struct gl_buffer_object *buffer_obj, uint32_t buffer_offset, uint32_t row_stride, bool read_only) { struct brw_context *brw = brw_context(ctx); struct intel_texture_object *intel_texobj = intel_texture_object(tex_obj); struct gl_texture_image *image = tex_obj->Image[0][0]; struct intel_texture_image *intel_image = intel_texture_image(image); struct intel_buffer_object *intel_buffer_obj = intel_buffer_object(buffer_obj); if (!read_only) { /* Renderbuffers have the restriction that the buffer offset and * surface pitch must be a multiple of the element size. If it's * not, we have to fail and fall back to software. */ int cpp = _mesa_get_format_bytes(image->TexFormat); if (buffer_offset % cpp || row_stride % cpp) { perf_debug("Bad PBO alignment; fallback to CPU mapping\n"); return false; } if (!brw->format_supported_as_render_target[image->TexFormat]) { perf_debug("Non-renderable PBO format; fallback to CPU mapping\n"); return false; } } assert(intel_texobj->mt == NULL); drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_obj, buffer_offset, row_stride * image->Height); intel_texobj->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat, buffer_offset, image->Width, image->Height, image->Depth, row_stride, 0); if (!intel_texobj->mt) return false; if (!_swrast_init_texture_image(image)) return false; intel_miptree_reference(&intel_image->mt, intel_texobj->mt); /* The miptree is in a validated state, so no need to check later. */ intel_texobj->needs_validate = false; intel_texobj->validated_first_level = 0; intel_texobj->validated_last_level = 0; intel_texobj->_Format = intel_texobj->mt->format; return true; }
static void gen8_upload_3dstate_so_buffers(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_TRANSFORM_FEEDBACK */ struct gl_transform_feedback_object *xfb_obj = ctx->TransformFeedback.CurrentObject; struct brw_transform_feedback_object *brw_obj = (struct brw_transform_feedback_object *) xfb_obj; /* Set up the up to 4 output buffers. These are the ranges defined in the * gl_transform_feedback_object. */ for (int i = 0; i < 4; i++) { struct intel_buffer_object *bufferobj = intel_buffer_object(xfb_obj->Buffers[i]); if (!bufferobj) { BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2)); OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); continue; } uint32_t start = xfb_obj->Offset[i]; assert(start % 4 == 0); uint32_t end = ALIGN(start + xfb_obj->Size[i], 4); drm_intel_bo *bo = intel_bufferobj_buffer(brw, bufferobj, start, end - start); assert(end <= bo->size); perf_debug("Missing MOCS setup for 3DSTATE_SO_BUFFER."); BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2)); OUT_BATCH(GEN8_SO_BUFFER_ENABLE | (i << SO_BUFFER_INDEX_SHIFT) | GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE | GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE | (BDW_MOCS_WB << 22)); OUT_RELOC64(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start); OUT_BATCH(xfb_obj->Size[i] / 4 - 1); OUT_RELOC64(brw_obj->offset_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, i * sizeof(uint32_t)); if (brw_obj->zero_offsets) OUT_BATCH(0); /* Zero out the offset and write that to offset_bo */ else OUT_BATCH(0xFFFFFFFF); /* Use offset_bo as the "Stream Offset." */ ADVANCE_BATCH(); } brw_obj->zero_offsets = false; }
/* XXX: Do this for TexSubImage also: */ static GLboolean try_pbo_upload(struct intel_context *intel, struct intel_texture_image *intelImage, const struct gl_pixelstore_attrib *unpack, GLint internalFormat, GLint width, GLint height, GLenum format, GLenum type, const void *pixels) { struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { _mesa_printf("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } src_offset = (GLuint) pixels; if (unpack->RowLength > 0) src_stride = unpack->RowLength; else src_stride = width; dst_offset = intel_miptree_image_offset(intelImage->mt, intelImage->face, intelImage->level); dst_stride = intelImage->mt->pitch; intelFlush(&intel->ctx); LOCK_HARDWARE(intel); { struct _DriBufferObject *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); struct _DriBufferObject *dst_buffer = intel_region_buffer(intel->intelScreen, intelImage->mt->region, INTEL_WRITE_FULL); intelEmitCopyBlit(intel, intelImage->mt->cpp, src_stride, src_buffer, src_offset, dst_stride, dst_buffer, dst_offset, 0, 0, 0, 0, width, height, GL_COPY); intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); return GL_TRUE; }
static void upload_3dstate_so_buffers(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_VERTEX_PROGRAM */ const struct gl_shader_program *vs_prog = ctx->Shader.CurrentVertexProgram; const struct gl_transform_feedback_info *linked_xfb_info = &vs_prog->LinkedTransformFeedback; /* BRW_NEW_TRANSFORM_FEEDBACK */ struct gl_transform_feedback_object *xfb_obj = ctx->TransformFeedback.CurrentObject; int i; /* Set up the up to 4 output buffers. These are the ranges defined in the * gl_transform_feedback_object. */ for (i = 0; i < 4; i++) { struct intel_buffer_object *bufferobj = intel_buffer_object(xfb_obj->Buffers[i]); drm_intel_bo *bo; uint32_t start, end; uint32_t stride; if (!xfb_obj->Buffers[i]) { /* The pitch of 0 in this command indicates that the buffer is * unbound and won't be written to. */ BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2)); OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); continue; } bo = intel_bufferobj_buffer(brw, bufferobj, INTEL_WRITE_PART); stride = linked_xfb_info->BufferStride[i] * 4; start = xfb_obj->Offset[i]; assert(start % 4 == 0); end = ALIGN(start + xfb_obj->Size[i], 4); assert(end <= bo->size); BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2)); OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT) | stride); OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start); OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, end); ADVANCE_BATCH(); } }
int brw_prepare_indices( struct brw_context *brw, const struct _mesa_index_buffer *index_buffer, dri_bo **bo_return, GLuint *offset_return) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; dri_bo *bo; struct gl_buffer_object *bufferobj = index_buffer->obj; GLuint offset = (GLuint)index_buffer->ptr; int ret; /* Turn into a proper VBO: */ if (!bufferobj->Name) { /* Get new bufferobj, offset: */ get_space(brw, ib_size, &bo, &offset); /* Straight upload */ dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); } else { /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ if ((get_size(index_buffer->type) - 1) & offset) { GLubyte *map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, GL_DYNAMIC_DRAW_ARB, bufferobj); map += offset; get_space(brw, ib_size, &bo, &offset); dri_bo_subdata(bo, offset, ib_size, map); ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); } else { bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), INTEL_READ); dri_bo_reference(bo); } } *bo_return = bo; *offset_return = offset; ret = dri_bufmgr_check_aperture_space(bo); return ret; }
static void brw_upload_indices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; drm_intel_bo *old_bo = brw->ib.bo; struct gl_buffer_object *bufferobj; GLuint offset; GLuint ib_type_size; if (index_buffer == NULL) return; ib_type_size = _mesa_sizeof_type(index_buffer->type); ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj; /* Turn into a proper VBO: */ if (!_mesa_is_bufferobj(bufferobj)) { /* Get new bufferobj, offset: */ intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size, &brw->ib.bo, &offset); } else { offset = (GLuint) (unsigned long) index_buffer->ptr; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ if ((ib_type_size - 1) & offset) { perf_debug("copying index buffer to a temporary to work around " "misaligned offset %d\n", offset); GLubyte *map = ctx->Driver.MapBufferRange(ctx, offset, ib_size, GL_MAP_READ_BIT, bufferobj, MAP_INTERNAL); intel_upload_data(brw, map, ib_size, ib_type_size, &brw->ib.bo, &offset); ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL); } else { drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj), offset, ib_size); if (bo != brw->ib.bo) { drm_intel_bo_unreference(brw->ib.bo); brw->ib.bo = bo; drm_intel_bo_reference(bo); } } } /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index * of our drawing. */ brw->ib.start_vertex_offset = offset / ib_type_size; if (brw->ib.bo != old_bo) brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; if (index_buffer->type != brw->ib.type) { brw->ib.type = index_buffer->type; brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; } }
void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; int delta, i, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; uint32_t offset, size; if (glarray->InstanceDivisor) { offset = buffer->offset; size = (buffer->stride * ((brw->num_instances / glarray->InstanceDivisor) - 1) + glarray->_ElementSize); } else { if (min_index == -1) { offset = 0; size = intel_buffer->Base.Size; } else { offset = buffer->offset + min_index * buffer->stride; size = (buffer->stride * (max_index - min_index) + glarray->_ElementSize); } } buffer->bo = intel_bufferobj_buffer(brw, intel_buffer, offset, size); drm_intel_bo_reference(buffer->bo); input->buffer = j++; input->offset = 0; } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the * GTT. We would hope that the VBO's max_index would save us, but * Mesa appears to hand us min/max values not clipped to the * array object's _MaxElement, and _MaxElement frequently appears * to be wrong anyway. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than * trying to just not render. */ assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }
static bool do_blit_drawpixels(struct gl_context * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const GLvoid * pixels) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; drm_intel_bo *src_buffer; DBG("%s\n", __FUNCTION__); if (!intel_check_blit_fragment_ops(ctx, false)) return false; if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { DBG("%s: fallback due to MRT\n", __FUNCTION__); return false; } struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); if (!_mesa_format_matches_format_and_type(irb->mt->format, format, type, false)) { DBG("%s: bad format for blit\n", __FUNCTION__); return false; } if (unpack->SwapBytes || unpack->LsbFirst || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: bad packing params\n", __FUNCTION__); return false; } int src_stride = _mesa_image_row_stride(unpack, width, format, type); bool src_flip = false; /* Mesa flips the src_stride for unpack->Invert, but we want our mt to have * a normal src_stride. */ if (unpack->Invert) { src_stride = -src_stride; src_flip = true; } src_offset = (GLintptr)pixels; src_offset += _mesa_image_offset(2, unpack, width, height, format, type, 0, 0, 0); intel_prepare_render(brw); src_buffer = intel_bufferobj_buffer(brw, src, src_offset, width * height * irb->mt->cpp); struct intel_mipmap_tree *pbo_mt = intel_miptree_create_for_bo(brw, src_buffer, irb->mt->format, src_offset, width, height, src_stride, I915_TILING_NONE); if (!pbo_mt) return false; if (!intel_miptree_blit(brw, pbo_mt, 0, 0, 0, 0, src_flip, irb->mt, irb->mt_level, irb->mt_layer, x, y, _mesa_is_winsys_fbo(ctx->DrawBuffer), width, height, GL_COPY)) { DBG("%s: blit failed\n", __FUNCTION__); intel_miptree_release(&pbo_mt); return false; } intel_miptree_release(&pbo_mt); if (ctx->Query.CurrentOcclusionObject) ctx->Query.CurrentOcclusionObject->Result += width * height; intel_check_front_buffer_rendering(brw); DBG("%s: success\n", __FUNCTION__); return true; }
static GLboolean do_blit_readpixels(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { struct intel_context *intel = intel_context(ctx); struct intel_region *src = intel_readbuf_region(intel); struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); GLuint dst_offset; GLuint rowLength; drm_intel_bo *dst_buffer; GLboolean all; GLint dst_x, dst_y; if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s\n", __FUNCTION__); if (!src) return GL_FALSE; if (!_mesa_is_bufferobj(pack->BufferObj)) { /* PBO only for now: */ if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } if (ctx->_ImageTransferState || !intel_check_blit_format(src, format, type)) { if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s - bad format for blit\n", __FUNCTION__); return GL_FALSE; } if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) { if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s: bad packing params\n", __FUNCTION__); return GL_FALSE; } if (pack->RowLength > 0) rowLength = pack->RowLength; else rowLength = width; if (pack->Invert) { if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); return GL_FALSE; } else { if (ctx->ReadBuffer->Name == 0) rowLength = -rowLength; } dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height, format, type, 0, 0, 0); if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) { return GL_TRUE; } intel_prepare_render(intel); all = (width * height * src->cpp == dst->Base.Size && x == 0 && dst_offset == 0); dst_x = 0; dst_y = 0; dst_buffer = intel_bufferobj_buffer(intel, dst, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); if (ctx->ReadBuffer->Name == 0) y = ctx->ReadBuffer->Height - (y + height); if (!intelEmitCopyBlit(intel, src->cpp, src->pitch, src->buffer, 0, src->tiling, rowLength, dst_buffer, dst_offset, GL_FALSE, x, y, dst_x, dst_y, width, height, GL_COPY)) { return GL_FALSE; } if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s - DONE\n", __FUNCTION__); return GL_TRUE; }
int brw_prepare_vertices( struct brw_context *brw, GLuint min_index, GLuint max_index ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint tmp = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; int ret = 0; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* First build an array of pointers to ve's in vb.inputs_read */ if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ while (tmp) { GLuint i = _mesa_ffsll(tmp)-1; struct brw_vertex_element *input = &brw->vb.inputs[i]; tmp &= ~(1<<i); enabled[nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all * the way to software rasterization, although a tnl fallback * would be sufficient. I don't know of *any* real world * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ if (nr_enabled >= BRW_VEP_MAX) return -1; for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->glarray->BufferObj->Name != 0) { struct intel_buffer_object *intel_buffer = intel_buffer_object(input->glarray->BufferObj); /* Named buffer object: Just reference its contents directly. */ input->bo = intel_bufferobj_buffer(intel, intel_buffer, INTEL_READ); dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; ret |= dri_bufmgr_check_aperture_space(input->bo); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (i == 0) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) return -1; interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; } else if (interleave != input->glarray->StrideB || (const unsigned char *)input->glarray->Ptr - ptr < 0 || (const unsigned char *)input->glarray->Ptr - ptr > interleave) { interleave = 0; } upload[nr_uploads++] = input; /* We rebase drawing to start at element zero only when * varyings are not in vbos, which means we can end up * uploading non-varying arrays (stride != 0) when min_index * is zero. This doesn't matter as the amount to upload is * the same for these arrays whether the draw call is rebased * or not - we just have to upload the one element. */ assert(min_index == 0 || input->glarray->StrideB == 0); } } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1 && interleave && interleave <= 256) { /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], interleave); ret |= dri_bufmgr_check_aperture_space(upload[0]->bo); for (i = 1; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->stride = interleave; upload[i]->offset = upload[0]->offset + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->bo = upload[0]->bo; dri_bo_reference(upload[i]->bo); } } else { /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); if (upload[i]->bo) { ret |= dri_bufmgr_check_aperture_space(upload[i]->bo); } } } if (ret) return 1; return 0; }
static struct buffer *array_buffer( const struct gl_client_array *array ) { return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj)); }
void brw_upload_indices( struct brw_context *brw, const struct _mesa_index_buffer *index_buffer ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; struct gl_buffer_object *bufferobj = index_buffer->obj; GLuint offset = (GLuint)index_buffer->ptr; /* Turn into a proper VBO: */ if (!bufferobj->Name) { /* Get new bufferobj, offset: */ get_space(brw, ib_size, &bufferobj, &offset); /* Straight upload */ ctx->Driver.BufferSubData( ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, offset, ib_size, index_buffer->ptr, bufferobj); } else { /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ if ((get_size(index_buffer->type) - 1) & offset) { struct gl_buffer_object *vbo; GLuint voffset; GLubyte *map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, GL_DYNAMIC_DRAW_ARB, bufferobj); map += offset; get_space(brw, ib_size, &vbo, &voffset); ctx->Driver.BufferSubData(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, voffset, ib_size, map, vbo); ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); bufferobj = vbo; offset = voffset; } } /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj)); memset(&ib, 0, sizeof(ib)); ib.header.bits.opcode = CMD_INDEX_BUFFER; ib.header.bits.length = sizeof(ib)/4 - 2; ib.header.bits.index_format = get_index_type(index_buffer->type); ib.header.bits.cut_index_enable = 0; BEGIN_BATCH(4, 0); OUT_BATCH( ib.header.dword ); OUT_BATCH( bmBufferOffset(intel, buffer) + offset ); OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size ); OUT_BATCH( 0 ); ADVANCE_BATCH(); } }
static bool do_blit_readpixels(struct gl_context * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); GLuint dst_offset; drm_intel_bo *dst_buffer; GLint dst_x, dst_y; GLuint dirty; DBG("%s\n", __FUNCTION__); assert(_mesa_is_bufferobj(pack->BufferObj)); struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; struct intel_renderbuffer *irb = intel_renderbuffer(rb); if (ctx->_ImageTransferState || !_mesa_format_matches_format_and_type(irb->mt->format, format, type, false)) { DBG("%s - bad format for blit\n", __FUNCTION__); return false; } if (pack->SwapBytes || pack->LsbFirst) { DBG("%s: bad packing params\n", __FUNCTION__); return false; } int dst_stride = _mesa_image_row_stride(pack, width, format, type); bool dst_flip = false; /* Mesa flips the dst_stride for pack->Invert, but we want our mt to have a * normal dst_stride. */ struct gl_pixelstore_attrib uninverted_pack = *pack; if (pack->Invert) { dst_stride = -dst_stride; dst_flip = true; uninverted_pack.Invert = false; } dst_offset = (GLintptr)pixels; dst_offset += _mesa_image_offset(2, &uninverted_pack, width, height, format, type, 0, 0, 0); if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) { return true; } dirty = brw->front_buffer_dirty; intel_prepare_render(brw); brw->front_buffer_dirty = dirty; dst_buffer = intel_bufferobj_buffer(brw, dst, dst_offset, height * dst_stride); struct intel_mipmap_tree *pbo_mt = intel_miptree_create_for_bo(brw, dst_buffer, irb->mt->format, dst_offset, width, height, dst_stride, I915_TILING_NONE); if (!intel_miptree_blit(brw, irb->mt, irb->mt_level, irb->mt_layer, x, y, _mesa_is_winsys_fbo(ctx->ReadBuffer), pbo_mt, 0, 0, 0, 0, dst_flip, width, height, GL_COPY)) { return false; } intel_miptree_release(&pbo_mt); DBG("%s - DONE\n", __FUNCTION__); return true; }
static void brw_emit_prim(struct brw_context *brw, const struct _mesa_prim *prim, uint32_t hw_prim) { int verts_per_instance; int vertex_access_type; int indirect_flag; DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode), prim->start, prim->count); int start_vertex_location = prim->start; int base_vertex_location = prim->basevertex; if (prim->indexed) { vertex_access_type = brw->gen >= 7 ? GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; start_vertex_location += brw->ib.start_vertex_offset; base_vertex_location += brw->vb.start_vertex_bias; } else { vertex_access_type = brw->gen >= 7 ? GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL : GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; start_vertex_location += brw->vb.start_vertex_bias; } /* We only need to trim the primitive count on pre-Gen6. */ if (brw->gen < 6) verts_per_instance = trim(prim->mode, prim->count); else verts_per_instance = prim->count; /* If nothing to emit, just return. */ if (verts_per_instance == 0 && !prim->is_indirect) return; /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache * and missed flushes of the render cache as it heads to other parts of * the besides the draw code. */ if (brw->always_flush_cache) brw_emit_mi_flush(brw); /* If indirect, emit a bunch of loads from the indirect BO. */ if (prim->is_indirect) { struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer; drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_object(indirect_buffer), prim->indirect_offset, 5 * sizeof(GLuint)); indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 0); brw_load_register_mem(brw, GEN7_3DPRIM_INSTANCE_COUNT, bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 4); brw_load_register_mem(brw, GEN7_3DPRIM_START_VERTEX, bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 8); if (prim->indexed) { brw_load_register_mem(brw, GEN7_3DPRIM_BASE_VERTEX, bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 12); brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 16); } else { brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 12); BEGIN_BATCH(3); OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX); OUT_BATCH(0); ADVANCE_BATCH(); } } else { indirect_flag = 0; } BEGIN_BATCH(brw->gen >= 7 ? 7 : 6); if (brw->gen >= 7) { const int predicate_enable = (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) ? GEN7_3DPRIM_PREDICATE_ENABLE : 0; OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); OUT_BATCH(hw_prim | vertex_access_type); } else { OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | vertex_access_type); } OUT_BATCH(verts_per_instance); OUT_BATCH(start_vertex_location); OUT_BATCH(prim->num_instances); OUT_BATCH(prim->base_instance); OUT_BATCH(base_vertex_location); ADVANCE_BATCH(); if (brw->always_flush_cache) brw_emit_mi_flush(brw); }
/* Pros: * - no waiting for idle before updating framebuffer. * * Cons: * - if upload is by memcpy, this may actually be slower than fallback path. * - uploads the whole image even if destination is clipped * * Need to benchmark. * * Given the questions about performance, implement for pbo's only. * This path is definitely a win if the pbo is already in agp. If it * turns out otherwise, we can add the code necessary to upload client * data to agp space before performing the blit. (Though it may turn * out to be better/simpler just to use the texture engine). */ static GLboolean do_blit_drawpixels(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const GLvoid * pixels) { struct intel_context *intel = intel_context(ctx); struct intel_region *dest = intel_drawbuf_region(intel); struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; GLuint rowLength; struct _DriFenceObject *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); if (!dest) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - no dest\n", __FUNCTION__); return GL_FALSE; } if (src) { /* This validation should be done by core mesa: */ if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, format, type, pixels)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); return GL_TRUE; } } else { /* PBO only for now: */ if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } if (!intel_check_blit_format(dest, format, type)) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad format for blit\n", __FUNCTION__); return GL_FALSE; } if (!intel_check_blit_fragment_ops(ctx)) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad GL fragment state for blitter\n", __FUNCTION__); return GL_FALSE; } if (ctx->Pixel.ZoomX != 1.0F) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__); return GL_FALSE; } if (unpack->RowLength > 0) rowLength = unpack->RowLength; else rowLength = width; if (ctx->Pixel.ZoomY == -1.0F) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__); return GL_FALSE; /* later */ y -= height; } else if (ctx->Pixel.ZoomY == 1.0F) { rowLength = -rowLength; } else { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__); return GL_FALSE; } src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height, format, type, 0, 0, 0); intelFlush(&intel->ctx); LOCK_HARDWARE(intel); if (intel->driDrawable->numClipRects) { __DRIdrawablePrivate *dPriv = intel->driDrawable; int nbox = dPriv->numClipRects; drm_clip_rect_t *box = dPriv->pClipRects; drm_clip_rect_t rect; drm_clip_rect_t dest_rect; struct _DriBufferObject *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ); int i; dest_rect.x1 = dPriv->x + x; dest_rect.y1 = dPriv->y + dPriv->h - (y + height); dest_rect.x2 = dest_rect.x1 + width; dest_rect.y2 = dest_rect.y1 + height; for (i = 0; i < nbox; i++) { if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i])) continue; intelEmitCopyBlit(intel, dest->cpp, rowLength, src_buffer, src_offset, dest->pitch, dest->buffer, 0, rect.x1 - dest_rect.x1, rect.y2 - dest_rect.y2, rect.x1, rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } fence = intel_batchbuffer_flush(intel->batch); driFenceReference(fence); } UNLOCK_HARDWARE(intel); if (fence) { driFenceFinish(fence, DRM_FENCE_TYPE_EXE | DRM_I915_FENCE_TYPE_RW, GL_FALSE); driFenceUnReference(fence); } if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); return GL_TRUE; }
void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_VS_PROG_DATA */ const struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(brw->vs.base.prog_data); GLbitfield64 vs_inputs = vs_prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; unsigned i; int delta, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint index = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[index]; vs_inputs &= ~BITFIELD64_BIT(index); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; /* The range of data in a given buffer represented as [min, max) */ struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; uint32_t buffer_range_start[VERT_ATTRIB_MAX]; uint32_t buffer_range_end[VERT_ATTRIB_MAX]; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); const uint32_t offset = (uintptr_t)glarray->Ptr; /* Start with the worst case */ uint32_t start = 0; uint32_t range = intel_buffer->Base.Size; if (glarray->InstanceDivisor) { if (brw->num_instances) { start = offset + glarray->StrideB * brw->baseinstance; range = (glarray->StrideB * ((brw->num_instances - 1) / glarray->InstanceDivisor) + glarray->_ElementSize); } } else { if (brw->vb.index_bounds_valid) { start = offset + min_index * glarray->StrideB; range = (glarray->StrideB * (max_index - min_index) + glarray->_ElementSize); } } /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ unsigned k; for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; buffer_range_start[input->buffer] = MIN2(buffer_range_start[input->buffer], start); buffer_range_end[input->buffer] = MAX2(buffer_range_end[input->buffer], start + range); break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = offset; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; buffer->size = glarray->BufferObj->Size - offset; enabled_buffer[j] = intel_buffer; buffer_range_start[j] = start; buffer_range_end[j] = start + range; input->buffer = j++; input->offset = 0; } } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* Now that we've set up all of the buffers, we walk through and reference * each of them. We do this late so that we get the right size in each * buffer and don't reference too little data. */ for (i = 0; i < j; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; if (buffer->bo) continue; const uint32_t start = buffer_range_start[i]; const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); drm_intel_bo_reference(buffer->bo); } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; buffer->size += delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->size += delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }