/** * Interface for getting memory for uploading streamed data to the GPU * * In most cases, streamed data (for GPU state structures, for example) is * uploaded through brw_state_batch(), since that interface allows relocations * from the streamed space returned to other BOs. However, that interface has * the restriction that the amount of space allocated has to be "small" (see * estimated_max_prim_size in brw_draw.c). * * This interface, on the other hand, is able to handle arbitrary sized * allocation requests, though it will batch small allocations into the same * BO for efficiency and reduced memory footprint. * * \note The returned pointer is valid only until intel_upload_finish(), which * will happen at batch flush or the next * intel_upload_space()/intel_upload_data(). * * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on * entry, and will have a reference to the new BO containing the state on * return. * * \param out_offset Offset within the buffer object that the data will land. */ void * intel_upload_space(struct brw_context *brw, uint32_t size, uint32_t alignment, drm_intel_bo **out_bo, uint32_t *out_offset) { uint32_t offset; offset = ALIGN_NPOT(brw->upload.next_offset, alignment); if (brw->upload.bo && offset + size > brw->upload.bo->size) { intel_upload_finish(brw); offset = 0; } if (!brw->upload.bo) { brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data", MAX2(INTEL_UPLOAD_SIZE, size), 4096); if (brw->has_llc) drm_intel_bo_map(brw->upload.bo, true); else drm_intel_gem_bo_map_gtt(brw->upload.bo); } brw->upload.next_offset = offset + size; *out_offset = offset; if (*out_bo != brw->upload.bo) { drm_intel_bo_unreference(*out_bo); *out_bo = brw->upload.bo; drm_intel_bo_reference(brw->upload.bo); } return brw->upload.bo->virtual + offset; }
static void store_dword_loop(int fd) { int i; int num_rings = gem_get_num_rings(fd); srandom(0xdeadbeef); for (i = 0; i < SLOW_QUICK(0x100000, 10); i++) { int ring = random() % num_rings + 1; if (ring == I915_EXEC_RENDER) { BEGIN_BATCH(4, 1); OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); OUT_BATCH(0xffffffff); /* compare dword */ OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(MI_NOOP); ADVANCE_BATCH(); } else { BEGIN_BATCH(4, 1); OUT_BATCH(MI_FLUSH_DW | 1); OUT_BATCH(0); /* reserved */ OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(MI_NOOP | (1<<22) | (0xf)); ADVANCE_BATCH(); } intel_batchbuffer_flush_on_ring(batch, ring); } drm_intel_bo_map(target_buffer, 0); // map to force waiting on rendering drm_intel_bo_unmap(target_buffer); }
static void dummy_reloc_loop_random_ring(int num_rings) { int i; srandom(0xdeadbeef); for (i = 0; i < 0x100000; i++) { int ring = random() % num_rings + 1; BEGIN_BATCH(4, 1); if (ring == I915_EXEC_RENDER) { OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); OUT_BATCH(0xffffffff); /* compare dword */ OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(MI_NOOP); } else { OUT_BATCH(MI_FLUSH_DW | 1); OUT_BATCH(0); /* reserved */ OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(MI_NOOP | (1<<22) | (0xf)); } ADVANCE_BATCH(); intel_batchbuffer_flush_on_ring(batch, ring); drm_intel_bo_map(target_buffer, 0); // map to force waiting on rendering drm_intel_bo_unmap(target_buffer); } }
/* XXX: Thread safety? */ void * intel_region_map(struct intel_context *intel, struct intel_region *region, GLbitfield mode) { /* We have the region->map_refcount controlling mapping of the BO because * in software fallbacks we may end up mapping the same buffer multiple * times on Mesa's behalf, so we refcount our mappings to make sure that * the pointer stays valid until the end of the unmap chain. However, we * must not emit any batchbuffers between the start of mapping and the end * of unmapping, or further use of the map will be incoherent with the GPU * rendering done by that batchbuffer. Hence we assert in * intel_batchbuffer_flush() that that doesn't happen, which means that the * flush is only needed on first map of the buffer. */ _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount) { intel_flush(&intel->ctx); if (region->tiling != I915_TILING_NONE) drm_intel_gem_bo_map_gtt(region->bo); else drm_intel_bo_map(region->bo, true); region->map = region->bo->virtual; }
/** * Map a buffer object; issue performance warnings if mapping causes stalls. * * This matches the drm_intel_bo_map API, but takes an additional human-readable * name for the buffer object to use in the performance debug message. */ int brw_bo_map(struct brw_context *brw, drm_intel_bo *bo, int write_enable, const char *bo_name) { if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo)) return drm_intel_bo_map(bo, write_enable); float start_time = get_time(); int ret = drm_intel_bo_map(bo, write_enable); perf_debug("CPU mapping a busy %s BO stalled and took %.03f ms.\n", bo_name, (get_time() - start_time) * 1000); return ret; }
/** * Called via glMapBufferARB(). */ static void * intel_bufferobj_map(struct gl_context * ctx, GLenum target, GLenum access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); GLboolean read_only = (access == GL_READ_ONLY_ARB); GLboolean write_only = (access == GL_WRITE_ONLY_ARB); assert(intel_obj); if (intel_obj->sys_buffer) { if (!read_only && intel_obj->source) { release_buffer(intel_obj); } if (!intel_obj->buffer || intel_obj->source) { obj->Pointer = intel_obj->sys_buffer; obj->Length = obj->Size; obj->Offset = 0; return obj->Pointer; } free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; } /* Flush any existing batchbuffer that might reference this data. */ if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) intel_flush(ctx); if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); if (intel_obj->buffer == NULL) { obj->Pointer = NULL; return NULL; } if (write_only) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->buffer, !read_only); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->buffer->virtual; obj->Length = obj->Size; obj->Offset = 0; return obj->Pointer; }
void * intel_bo_map(struct intel_bo *bo, bool write_enable) { int err; err = drm_intel_bo_map(gem_bo(bo), write_enable); if (err) { debug_error("failed to map bo"); return NULL; } return gem_bo(bo)->virtual; }
static int intel_image_write(__DRIimage *image, const void *buf, size_t count) { if (image->region->map_refcount) return -1; if (!(image->usage & __DRI_IMAGE_USE_WRITE)) return -1; drm_intel_bo_map(image->region->bo, true); memcpy(image->region->bo->virtual, buf, count); drm_intel_bo_unmap(image->region->bo); return 0; }
static void scratch_buf_write_to_png(struct igt_buf *buf, const char *filename) { cairo_surface_t *surface; cairo_status_t ret; drm_intel_bo_map(buf->bo, 0); surface = cairo_image_surface_create_for_data(buf->bo->virtual, CAIRO_FORMAT_RGB24, igt_buf_width(buf), igt_buf_height(buf), buf->stride); ret = cairo_surface_write_to_png(surface, filename); igt_assert(ret == CAIRO_STATUS_SUCCESS); cairo_surface_destroy(surface); drm_intel_bo_unmap(buf->bo); }
static void intel_batchbuffer_reset(struct brw_context *brw) { if (brw->batch.last_bo != NULL) { drm_intel_bo_unreference(brw->batch.last_bo); brw->batch.last_bo = NULL; } brw->batch.last_bo = brw->batch.bo; brw_render_cache_set_clear(brw); brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer", BATCH_SZ, 4096); if (brw->has_llc) { drm_intel_bo_map(brw->batch.bo, true); brw->batch.map = brw->batch.bo->virtual; }
static void do_batch_dump(struct intel_context *intel) { struct drm_intel_decode *decode; struct intel_batchbuffer *batch = &intel->batch; int ret; decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID); if (!decode) return; ret = drm_intel_bo_map(batch->bo, false); if (ret == 0) { drm_intel_decode_set_batch_pointer(decode, batch->bo->virtual, batch->bo->offset, batch->used); } else {
int drm_intel_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, unsigned long size, void *data) { int ret; if (bo->bufmgr->bo_get_subdata) return bo->bufmgr->bo_get_subdata(bo, offset, size, data); if (size == 0 || data == NULL) return 0; ret = drm_intel_bo_map(bo, 0); if (ret) return ret; memcpy(data, (unsigned char *)bo->virtual + offset, size); drm_intel_bo_unmap(bo); return 0; }
void * intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt) { drm_intel_bo *bo = mt->region->bo; if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { if (drm_intel_bo_busy(bo)) { perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); } } intel_flush(&intel->ctx); if (mt->region->tiling != I915_TILING_NONE) drm_intel_gem_bo_map_gtt(bo); else drm_intel_bo_map(bo, true); return bo->virtual; }
static void init_buffer(struct scratch_buf *buf, unsigned size) { buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096); assert(buf->bo); buf->tiling = I915_TILING_NONE; buf->stride = 4096; sanitize_stride(buf); if (options.no_hw) buf->data = malloc(size); else { if (options.use_cpu_maps) drm_intel_bo_map(buf->bo, 1); else drm_intel_gem_bo_map_gtt(buf->bo); buf->data = buf->bo->virtual; } buf->num_tiles = options.tiles_per_buf; }
static void dummy_reloc_loop(void) { int i, j; for (i = 0; i < 0x800; i++) { BEGIN_BATCH(8); OUT_BATCH(XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | XY_SRC_COPY_BLT_WRITE_RGB); OUT_BATCH((3 << 24) | /* 32 bits */ (0xcc << 16) | /* copy ROP */ 4*4096); OUT_BATCH(2048 << 16 | 0); OUT_BATCH((4096) << 16 | (2048)); OUT_RELOC_FENCED(blt_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(0 << 16 | 0); OUT_BATCH(4*4096); OUT_RELOC_FENCED(blt_bo, I915_GEM_DOMAIN_RENDER, 0, 0); ADVANCE_BATCH(); intel_batchbuffer_flush(batch); BEGIN_BATCH(4); OUT_BATCH(MI_FLUSH_DW | 1); OUT_BATCH(0); /* reserved */ OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(MI_NOOP | (1<<22) | (0xf)); ADVANCE_BATCH(); intel_batchbuffer_flush(batch); drm_intel_bo_map(target_buffer, 0); // map to force completion drm_intel_bo_unmap(target_buffer); } }
/** * Called via glMapBufferRange(). * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, * you'd end up blocking on execution of rendering every time you mapped * the buffer to put new data in. * * We support it in 3 ways: If unsynchronized, then don't bother * flushing the batchbuffer before mapping the buffer, which can save blocking * in many cases. If we would still block, and they allow the whole buffer * to be invalidated, then just allocate a new buffer to replace the old one. * If not, and we'd block, and they allow the subrange of the buffer to be * invalidated, then we can make a new little BO, let them write into that, * and blit it into the real BO at unmap time. */ static void * intel_bufferobj_map_range(GLcontext * ctx, GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also * internally uses our functions directly. */ obj->Offset = offset; obj->Length = length; obj->AccessFlags = access; if (intel_obj->sys_buffer) { obj->Pointer = intel_obj->sys_buffer + offset; return obj->Pointer; } if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); /* If the mapping is synchronized with other GL operations, flush * the batchbuffer so that GEM knows about the buffer access for later * syncing. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) intelFlush(ctx); if (intel_obj->buffer == NULL) { obj->Pointer = NULL; return NULL; } /* If the user doesn't care about existing buffer contents and mapping * would cause us to block, then throw out the old buffer. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && (access & GL_MAP_INVALIDATE_BUFFER_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { drm_intel_bo_unreference(intel_obj->buffer); intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", intel_obj->Base.Size, 64); } /* If the user is mapping a range of an active buffer object but * doesn't require the current contents of that range, make a new * BO, and we'll copy what they put in there out at unmap or * FlushRange time. */ if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { intel_obj->range_map_buffer = _mesa_malloc(length); obj->Pointer = intel_obj->range_map_buffer; } else { intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, "range map", length, 64); if (!(access & GL_MAP_READ_BIT) && intel->intelScreen->kernel_exec_fencing) { drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->range_map_bo, (access & GL_MAP_WRITE_BIT) != 0); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->range_map_bo->virtual; } return obj->Pointer; } if (!(access & GL_MAP_READ_BIT) && intel->intelScreen->kernel_exec_fencing) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->buffer->virtual + offset; return obj->Pointer; }
void *genode_map_image(__DRIimage *image) { /* map read only */ drm_intel_bo_map(image->bo, false); return image->bo->virtual; }
/** * Called via glMapBufferRange and glMapBuffer * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, * you'd end up blocking on execution of rendering every time you mapped * the buffer to put new data in. * * We support it in 3 ways: If unsynchronized, then don't bother * flushing the batchbuffer before mapping the buffer, which can save blocking * in many cases. If we would still block, and they allow the whole buffer * to be invalidated, then just allocate a new buffer to replace the old one. * If not, and we'd block, and they allow the subrange of the buffer to be * invalidated, then we can make a new little BO, let them write into that, * and blit it into the real BO at unmap time. */ static void * intel_bufferobj_map_range(struct gl_context * ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also * internally uses our functions directly. */ obj->Offset = offset; obj->Length = length; obj->AccessFlags = access; if (intel_obj->sys_buffer) { const bool read_only = (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; if (!read_only && intel_obj->source) release_buffer(intel_obj); if (!intel_obj->buffer || intel_obj->source) { obj->Pointer = intel_obj->sys_buffer + offset; return obj->Pointer; } free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; } if (intel_obj->buffer == NULL) { obj->Pointer = NULL; return NULL; } /* If the access is synchronized (like a normal buffer mapping), then get * things flushed out so the later mapping syncs appropriately through GEM. * If the user doesn't care about existing buffer contents and mapping would * cause us to block, then throw out the old buffer. * * If they set INVALIDATE_BUFFER, we can pitch the current contents to * achieve the required synchronization. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { drm_intel_bo_unreference(intel_obj->buffer); intel_bufferobj_alloc_buffer(intel, intel_obj); } else { perf_debug("Stalling on the GPU for mapping a busy buffer " "object\n"); intel_flush(ctx); } } else if (drm_intel_bo_busy(intel_obj->buffer) && (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { drm_intel_bo_unreference(intel_obj->buffer); intel_bufferobj_alloc_buffer(intel, intel_obj); } } /* If the user is mapping a range of an active buffer object but * doesn't require the current contents of that range, make a new * BO, and we'll copy what they put in there out at unmap or * FlushRange time. */ if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { intel_obj->range_map_buffer = malloc(length); obj->Pointer = intel_obj->range_map_buffer; } else { intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, "range map", length, 64); if (!(access & GL_MAP_READ_BIT)) { drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); } else { drm_intel_bo_map(intel_obj->range_map_bo, (access & GL_MAP_WRITE_BIT) != 0); } obj->Pointer = intel_obj->range_map_bo->virtual; } return obj->Pointer; } if (access & GL_MAP_UNSYNCHRONIZED_BIT) drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); else if (!(access & GL_MAP_READ_BIT)) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); } else { drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); } obj->Pointer = intel_obj->buffer->virtual + offset; return obj->Pointer; }