/** Allocates a new dri_bo to store the data for the buffer object. */ static void intel_bufferobj_alloc_buffer(struct intel_context *intel, struct intel_buffer_object *intel_obj) { intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", intel_obj->Base.Size, 64); }
void intel_next_vertex(intel_screen_private *intel) { intel_end_vertex(intel); intel->vertex_bo = dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096); }
struct intel_batchbuffer * intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size) { struct intel_batchbuffer *batch = calloc(1, sizeof(*batch)); assert(flag == I915_EXEC_RENDER || flag == I915_EXEC_BSD || flag == I915_EXEC_BLT || flag == I915_EXEC_VEBOX); if (!buffer_size || buffer_size < BATCH_SIZE) { buffer_size = BATCH_SIZE; } /* the buffer size can't exceed 4M */ if (buffer_size > MAX_BATCH_SIZE) { buffer_size = MAX_BATCH_SIZE; } batch->intel = intel; batch->flag = flag; batch->run = drm_intel_bo_mrb_exec; if (IS_GEN6(intel->device_info) && flag == I915_EXEC_RENDER) batch->wa_render_bo = dri_bo_alloc(intel->bufmgr, "wa scratch", 4096, 4096); else batch->wa_render_bo = NULL; intel_batchbuffer_reset(batch, buffer_size); return batch; }
static dri_bo *bo_alloc(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); int size = 4 * 4096; /* The 865 has issues with larger-than-page-sized batch buffers. */ if (IS_I865G(intel)) size = 4096; return dri_bo_alloc(intel->bufmgr, "batch", size, 4096); }
/* Break the COW tie to the pbo and allocate a new buffer. * The pbo gets to keep the data. */ void intel_region_release_pbo(struct intel_context *intel, struct intel_region *region) { assert(region->buffer == region->pbo->buffer); region->pbo->region = NULL; region->pbo = NULL; dri_bo_unreference(region->buffer); region->buffer = NULL; region->buffer = dri_bo_alloc(intel->bufmgr, "region", region->pitch * region->cpp * region->height, 64); }
static void wrap_buffers( struct brw_context *brw, GLuint size ) { if (size < BRW_UPLOAD_INIT_SIZE) size = BRW_UPLOAD_INIT_SIZE; brw->vb.upload.offset = 0; if (brw->vb.upload.bo != NULL) dri_bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", size, 1); /* Set the internal VBO\ to no-backing-store. We only use them as a * temporary within a brw_try_draw_prims while the lock is held. */ /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH FAKE TO PUSH THIS STUFF */ /* if (!brw->intel.ttm) dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); */ }
static void intel_batchbuffer_reset(struct intel_batchbuffer *batch, int buffer_size) { struct intel_driver_data *intel = batch->intel; int batch_size = buffer_size; assert(batch->flag == I915_EXEC_RENDER || batch->flag == I915_EXEC_BLT || batch->flag == I915_EXEC_BSD || batch->flag == I915_EXEC_VEBOX); dri_bo_unreference(batch->buffer); batch->buffer = dri_bo_alloc(intel->bufmgr, "batch buffer", batch_size, 0x1000); assert(batch->buffer); dri_bo_map(batch->buffer, 1); assert(batch->buffer->virtual); batch->map = batch->buffer->virtual; batch->size = batch_size; batch->ptr = batch->map; batch->atomic = 0; }
/** * Called via glMapBufferRange(). * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, * you'd end up blocking on execution of rendering every time you mapped * the buffer to put new data in. * * We support it in 3 ways: If unsynchronized, then don't bother * flushing the batchbuffer before mapping the buffer, which can save blocking * in many cases. If we would still block, and they allow the whole buffer * to be invalidated, then just allocate a new buffer to replace the old one. * If not, and we'd block, and they allow the subrange of the buffer to be * invalidated, then we can make a new little BO, let them write into that, * and blit it into the real BO at unmap time. */ static void * intel_bufferobj_map_range(GLcontext * ctx, GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also * internally uses our functions directly. */ obj->Offset = offset; obj->Length = length; obj->AccessFlags = access; if (intel_obj->sys_buffer) { obj->Pointer = intel_obj->sys_buffer + offset; return obj->Pointer; } if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); /* If the mapping is synchronized with other GL operations, flush * the batchbuffer so that GEM knows about the buffer access for later * syncing. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) intelFlush(ctx); if (intel_obj->buffer == NULL) { obj->Pointer = NULL; return NULL; } /* If the user doesn't care about existing buffer contents and mapping * would cause us to block, then throw out the old buffer. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && (access & GL_MAP_INVALIDATE_BUFFER_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { drm_intel_bo_unreference(intel_obj->buffer); intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", intel_obj->Base.Size, 64); } /* If the user is mapping a range of an active buffer object but * doesn't require the current contents of that range, make a new * BO, and we'll copy what they put in there out at unmap or * FlushRange time. */ if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { intel_obj->range_map_buffer = _mesa_malloc(length); obj->Pointer = intel_obj->range_map_buffer; } else { intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, "range map", length, 64); if (!(access & GL_MAP_READ_BIT) && intel->intelScreen->kernel_exec_fencing) { drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->range_map_bo, (access & GL_MAP_WRITE_BIT) != 0); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->range_map_bo->virtual; } return obj->Pointer; } if (!(access & GL_MAP_READ_BIT) && intel->intelScreen->kernel_exec_fencing) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->buffer->virtual + offset; return obj->Pointer; }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } return; } buf = (GLfloat *) calloc(1, bufsz); /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = *brw->wm.prog_data->param[i]; } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ assert(MAX_CLIP_PLANES == 6); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0]; buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; i++; } } } /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; if (brw->vertex_program->IsNVProgram) _mesa_load_tracked_matrices(ctx); /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); if (vp->use_const_buffer) { /* Load the subset of push constants that will get used when * we also have a pull constant buffer. */ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { if (brw->vs.constant_map[i] != -1) { assert(brw->vs.constant_map[i] <= nr); memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } } } else { for (i = 0; i < nr; i++) { memcpy(buf + offset + i * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } } } if (0) { for (i = 0; i < sz*16; i+=4) printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.curbe_bo != NULL && brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ free(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (brw->curbe.curbe_bo != NULL && brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); dri_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } if (brw->curbe.curbe_bo == NULL) { /* Allocate a single page for CURBE entries for this batchbuffer. * They're generally around 64b. */ brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; brw->curbe.curbe_next_offset += bufsz; brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset, buf, bufsz); } brw_add_validated_bo(brw, brw->curbe.curbe_bo); /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ }
VOID media_alloc_surface_bo (VADriverContextP ctx, struct object_surface * obj_surface, INT tiled, UINT fourcc, UINT subsampling) { INT region_width, region_height; MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) ctx->pDriverData; MEDIA_DRV_ASSERT (ctx); MEDIA_DRV_ASSERT (drv_ctx); if (obj_surface->bo) { MEDIA_DRV_ASSERT (obj_surface->fourcc); MEDIA_DRV_ASSERT (obj_surface->fourcc == fourcc); MEDIA_DRV_ASSERT (obj_surface->subsampling == subsampling); return; } obj_surface->x_cb_offset = 0; /* X offset is always 0 */ obj_surface->x_cr_offset = 0; if (tiled) { MEDIA_DRV_ASSERT (fourcc != VA_FOURCC ('I', '4', '2', '0') && fourcc != VA_FOURCC ('I', 'Y', 'U', 'V') && fourcc != VA_FOURCC ('Y', 'V', '1', '2')); obj_surface->width = ALIGN (obj_surface->orig_width, 128); obj_surface->height = ALIGN (obj_surface->orig_height, 32); region_height = obj_surface->height; switch (fourcc) { case VA_FOURCC ('N', 'V', '1', '2'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->height; region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32); break; case VA_FOURCC ('I', 'M', 'C', '1'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; obj_surface->y_cr_offset = obj_surface->height; obj_surface->y_cb_offset = obj_surface->y_cr_offset + ALIGN (obj_surface->cb_cr_height, 32); region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32) * 2; break; case VA_FOURCC ('I', 'M', 'C', '3'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN (obj_surface->cb_cr_height, 32); region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32) * 2; break; case VA_FOURCC ('4', '2', '2', 'H'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV422H); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN (obj_surface->cb_cr_height, 32); region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32) * 2; break; case VA_FOURCC ('4', '2', '2', 'V'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV422V); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width; obj_surface->cb_cr_height = obj_surface->orig_height / 2; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN (obj_surface->cb_cr_height, 32); region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32) * 2; break; case VA_FOURCC ('4', '1', '1', 'P'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV411); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 4; obj_surface->cb_cr_height = obj_surface->orig_height; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN (obj_surface->cb_cr_height, 32); region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32) * 2; break; case VA_FOURCC ('4', '4', '4', 'P'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV444); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width; obj_surface->cb_cr_height = obj_surface->orig_height; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN (obj_surface->cb_cr_height, 32); region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32) * 2; break; case VA_FOURCC ('Y', '8', '0', '0'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV400); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = 0; obj_surface->cb_cr_height = 0; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN (obj_surface->cb_cr_height, 32); region_width = obj_surface->width; region_height = obj_surface->height + ALIGN (obj_surface->cb_cr_height, 32) * 2; break; case VA_FOURCC ('Y', 'U', 'Y', '2'): case VA_FOURCC ('U', 'Y', 'V', 'Y'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV422H); obj_surface->width = ALIGN (obj_surface->orig_width * 2, 128); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; region_width = obj_surface->width; region_height = obj_surface->height; break; case VA_FOURCC ('R', 'G', 'B', 'A'): case VA_FOURCC ('R', 'G', 'B', 'X'): case VA_FOURCC ('B', 'G', 'R', 'A'): case VA_FOURCC ('B', 'G', 'R', 'X'): MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_RGBX); obj_surface->width = ALIGN (obj_surface->orig_width * 4, 128); region_width = obj_surface->width; region_height = obj_surface->height; break; default: /* Never get here */ MEDIA_DRV_ASSERT (0); break; } } else { MEDIA_DRV_ASSERT (subsampling == SUBSAMPLE_YUV420 || subsampling == SUBSAMPLE_YUV422H || subsampling == SUBSAMPLE_YUV422V || subsampling == SUBSAMPLE_RGBX || subsampling == SUBSAMPLE_P208); region_width = obj_surface->width; region_height = obj_surface->height; switch (fourcc) { case VA_FOURCC ('N', 'V', '1', '2'): obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->height; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; obj_surface->cb_cr_pitch = obj_surface->width; region_height = obj_surface->height + obj_surface->height / 2; break; case VA_FOURCC ('Y', 'V', '1', '2'): case VA_FOURCC ('I', '4', '2', '0'): if (fourcc == VA_FOURCC ('Y', 'V', '1', '2')) { obj_surface->y_cr_offset = obj_surface->height; obj_surface->y_cb_offset = obj_surface->height + obj_surface->height / 4; } else { obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->height + obj_surface->height / 4; } obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; obj_surface->cb_cr_pitch = obj_surface->width / 2; region_height = obj_surface->height + obj_surface->height / 2; break; case VA_FOURCC ('Y', 'U', 'Y', '2'): case VA_FOURCC ('U', 'Y', 'V', 'Y'): obj_surface->width = ALIGN (obj_surface->orig_width * 2, 16); obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height; obj_surface->cb_cr_pitch = obj_surface->width; region_width = obj_surface->width; region_height = obj_surface->height; break; case VA_FOURCC ('R', 'G', 'B', 'A'): case VA_FOURCC ('R', 'G', 'B', 'X'): case VA_FOURCC ('B', 'G', 'R', 'A'): case VA_FOURCC ('B', 'G', 'R', 'X'): obj_surface->width = ALIGN (obj_surface->orig_width * 4, 16); region_width = obj_surface->width; region_height = obj_surface->height; break; case VA_FOURCC ('P', '2', '0', '8'): obj_surface->width = ALIGN (obj_surface->orig_width, 32); region_width = obj_surface->width; region_height = obj_surface->height; break; default: /* Never get here */ MEDIA_DRV_ASSERT (0); break; } } obj_surface->size = ALIGN (region_width * region_height, 0x1000); if (tiled) { UINT tiling_mode = I915_TILING_Y; /* always uses Y-tiled format */ ULONG pitch; obj_surface->bo = drm_intel_bo_alloc_tiled (drv_ctx->drv_data.bufmgr, "vaapi surface", region_width, region_height, 1, &tiling_mode, &pitch, 0); MEDIA_DRV_ASSERT (tiling_mode == I915_TILING_Y); MEDIA_DRV_ASSERT (pitch == obj_surface->width); } else { obj_surface->bo = dri_bo_alloc (drv_ctx->drv_data.bufmgr, "vaapi surface", obj_surface->size, 0x1000); } obj_surface->fourcc = fourcc; obj_surface->subsampling = subsampling; MEDIA_DRV_ASSERT (obj_surface->bo); }