bool os_wait_until_zero(volatile int *var, uint64_t timeout) { if (!p_atomic_read(var)) return true; if (!timeout) return false; if (timeout == PIPE_TIMEOUT_INFINITE) { while (p_atomic_read(var)) { #if defined(PIPE_OS_UNIX) sched_yield(); #endif } return true; } else { int64_t start_time = os_time_get_nano(); int64_t end_time = start_time + timeout; while (p_atomic_read(var)) { if (os_time_timeout(start_time, end_time, os_time_get_nano())) return false; #if defined(PIPE_OS_UNIX) sched_yield(); #endif } return true; } }
void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs) { retry: pipe_mutex_lock(ws->cs_stack_lock); if (p_atomic_read(&ws->ncs) >= RING_LAST) { /* no room left for a flush */ pipe_mutex_unlock(ws->cs_stack_lock); goto retry; } ws->cs_stack[p_atomic_read(&ws->ncs)] = cs; p_atomic_inc(&ws->ncs); pipe_mutex_unlock(ws->cs_stack_lock); pipe_semaphore_signal(&ws->cs_queued); }
static void si_decompress_textures(struct si_context *sctx, int shader_start, int shader_end) { unsigned compressed_colortex_counter; if (sctx->blitter->running) return; /* Update the compressed_colortex_mask if necessary. */ compressed_colortex_counter = p_atomic_read(&sctx->screen->b.compressed_colortex_counter); if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) { sctx->b.last_compressed_colortex_counter = compressed_colortex_counter; si_update_compressed_colortex_masks(sctx); } /* Flush depth textures which need to be flushed. */ for (int i = shader_start; i < shader_end; i++) { if (sctx->samplers[i].depth_texture_mask) { si_flush_depth_textures(sctx, &sctx->samplers[i]); } if (sctx->samplers[i].compressed_colortex_mask) { si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); } if (sctx->images[i].compressed_colortex_mask) { si_decompress_image_color_textures(sctx, &sctx->images[i]); } } }
static void svga_buffer_destroy( struct pipe_screen *screen, struct pipe_resource *buf ) { struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf = svga_buffer( buf ); assert(!p_atomic_read(&buf->reference.count)); assert(!sbuf->dma.pending); if(sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); if(sbuf->uploaded.buffer) pipe_resource_reference(&sbuf->uploaded.buffer, NULL); if(sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); if(sbuf->swbuf && !sbuf->user) align_free(sbuf->swbuf); FREE(sbuf); }
static boolean vmw_svga_winsys_surface_is_flushed(struct svga_winsys_screen *sws, struct svga_winsys_surface *surface) { struct vmw_svga_winsys_surface *vsurf = vmw_svga_winsys_surface(surface); return (p_atomic_read(&vsurf->validated) == 0); }
/** * Re-validate the framebuffer. */ void vg_manager_validate_framebuffer(struct vg_context *ctx) { struct st_framebuffer *stfb = ctx->draw_buffer; struct pipe_resource *pt; /* no binding surface */ if (!stfb) return; if (!p_atomic_read(&ctx->draw_buffer_invalid)) return; /* validate the fb */ if (!stfb->iface->validate(stfb->iface, &stfb->strb_att, 1, &pt) || !pt) return; /* * unset draw_buffer_invalid first because vg_context_update_draw_buffer * will cause the framebuffer to be validated again because of a call to * vg_validate_state */ p_atomic_set(&ctx->draw_buffer_invalid, FALSE); vg_context_update_draw_buffer(ctx, pt); }
void vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, struct vmw_svga_winsys_surface *src) { struct pipe_reference *src_ref; struct pipe_reference *dst_ref; struct vmw_svga_winsys_surface *dst; if(pdst == NULL || *pdst == src) return; dst = *pdst; src_ref = src ? &src->refcnt : NULL; dst_ref = dst ? &dst->refcnt : NULL; if (pipe_reference(dst_ref, src_ref)) { vmw_ioctl_surface_destroy(dst->screen, dst->sid); #ifdef DEBUG /* to detect dangling pointers */ assert(p_atomic_read(&dst->validated) == 0); dst->sid = SVGA3D_INVALID_ID; #endif FREE(dst); } *pdst = src; }
/** * Create a framebuffer from a manager interface. */ static struct st_framebuffer * st_framebuffer_create(struct st_framebuffer_iface *stfbi) { struct st_framebuffer *stfb; struct gl_config mode; gl_buffer_index idx; if (!stfbi) return NULL; stfb = CALLOC_STRUCT(st_framebuffer); if (!stfb) return NULL; st_visual_to_context_mode(stfbi->visual, &mode); _mesa_initialize_window_framebuffer(&stfb->Base, &mode); stfb->iface = stfbi; stfb->iface_stamp = p_atomic_read(&stfbi->stamp) - 1; /* add the color buffer */ idx = stfb->Base._ColorDrawBufferIndexes[0]; if (!st_framebuffer_add_renderbuffer(stfb, idx)) { free(stfb); return NULL; } st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH); st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM); stfb->stamp = 0; st_framebuffer_update_attachments(stfb); return stfb; }
static void svga_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf = svga_buffer(buf); assert(!p_atomic_read(&buf->reference.count)); assert(!sbuf->dma.pending); if (sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); if (sbuf->uploaded.buffer) pipe_resource_reference(&sbuf->uploaded.buffer, NULL); if (sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); if (sbuf->swbuf && !sbuf->user) align_free(sbuf->swbuf); pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); ss->hud.total_resource_bytes -= sbuf->size; assert(ss->hud.num_resources > 0); if (ss->hud.num_resources > 0) ss->hud.num_resources--; FREE(sbuf); }
/** * Re-validate the framebuffer. */ void vg_manager_validate_framebuffer(struct vg_context *ctx) { struct st_framebuffer *stfb = ctx->draw_buffer; struct pipe_resource *pt; /* no binding surface */ if (!stfb) return; if (!p_atomic_read(&ctx->draw_buffer_invalid)) return; /* validate the fb */ if (!stfb->iface->validate(stfb->iface, &stfb->strb_att, 1, &pt) || !pt) return; p_atomic_set(&ctx->draw_buffer_invalid, FALSE); if (vg_context_update_color_rb(ctx, pt) || stfb->width != pt->width0 || stfb->height != pt->height0) ctx->state.dirty |= FRAMEBUFFER_DIRTY; stfb->width = pt->width0; stfb->height = pt->height0; }
static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)param; struct radeon_drm_cs *cs; unsigned i, empty_stack; while (1) { pipe_semaphore_wait(&ws->cs_queued); if (ws->kill_thread) break; next: pipe_mutex_lock(ws->cs_stack_lock); cs = ws->cs_stack[0]; pipe_mutex_unlock(ws->cs_stack_lock); if (cs) { radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); pipe_mutex_lock(ws->cs_stack_lock); for (i = 1; i < p_atomic_read(&ws->ncs); i++) { ws->cs_stack[i - 1] = ws->cs_stack[i]; } ws->cs_stack[p_atomic_read(&ws->ncs) - 1] = NULL; empty_stack = p_atomic_dec_zero(&ws->ncs); if (empty_stack) { pipe_condvar_signal(ws->cs_queue_empty); } pipe_mutex_unlock(ws->cs_stack_lock); pipe_semaphore_signal(&cs->flush_completed); if (!empty_stack) { goto next; } } } pipe_mutex_lock(ws->cs_stack_lock); for (i = 0; i < p_atomic_read(&ws->ncs); i++) { pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed); ws->cs_stack[i] = NULL; } p_atomic_set(&ws->ncs, 0); pipe_condvar_signal(ws->cs_queue_empty); pipe_mutex_unlock(ws->cs_stack_lock); return NULL; }
/** * Dump the fenced buffer list. * * Useful to understand failures to allocate buffers. */ static void fenced_manager_dump_locked(struct fenced_manager *fenced_mgr) { #ifdef DEBUG struct pb_fence_ops *ops = fenced_mgr->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; debug_printf("%10s %7s %8s %7s %10s %s\n", "buffer", "size", "refcount", "storage", "fence", "signalled"); curr = fenced_mgr->unfenced.next; next = curr->next; while(curr != &fenced_mgr->unfenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); assert(!fenced_buf->fence); debug_printf("%10p %7u %8u %7s\n", (void *) fenced_buf, fenced_buf->base.base.size, p_atomic_read(&fenced_buf->base.base.reference.count), fenced_buf->buffer ? "gpu" : (fenced_buf->data ? "cpu" : "none")); curr = next; next = curr->next; } curr = fenced_mgr->fenced.next; next = curr->next; while(curr != &fenced_mgr->fenced) { int signaled; fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); assert(fenced_buf->buffer); signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); debug_printf("%10p %7u %8u %7s %10p %s\n", (void *) fenced_buf, fenced_buf->base.base.size, p_atomic_read(&fenced_buf->base.base.reference.count), "gpu", (void *) fenced_buf->fence, signaled == 0 ? "y" : "n"); curr = next; next = curr->next; } #else (void)fenced_mgr; #endif }
static inline int atomic_add_unless(int *v, int add, int unless) { int c, old; c = p_atomic_read(v); while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c) c = old; return c == unless; }
bool os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout) { if (!p_atomic_read(var)) return true; if (timeout == PIPE_TIMEOUT_INFINITE) return os_wait_until_zero(var, PIPE_TIMEOUT_INFINITE); while (p_atomic_read(var)) { if (os_time_get_nano() >= timeout) return false; #if defined(PIPE_OS_UNIX) sched_yield(); #endif } return true; }
/** * Re-validate the framebuffer. */ void vg_manager_validate_framebuffer(struct vg_context *ctx) { struct st_framebuffer *stfb = ctx->draw_buffer; struct pipe_resource *pt; int32_t new_stamp; /* no binding surface */ if (!stfb) return; new_stamp = p_atomic_read(&stfb->iface->stamp); if (stfb->iface_stamp != new_stamp) { do { /* validate the fb */ if (!stfb->iface->validate((struct st_context_iface *)ctx, stfb->iface, &stfb->strb_att, 1, &pt) || !pt) return; stfb->iface_stamp = new_stamp; new_stamp = p_atomic_read(&stfb->iface->stamp); } while (stfb->iface_stamp != new_stamp); if (vg_context_update_color_rb(ctx, pt) || stfb->width != pt->width0 || stfb->height != pt->height0) ++stfb->stamp; stfb->width = pt->width0; stfb->height = pt->height0; } if (ctx->draw_stamp != stfb->stamp) { ctx->state.dirty |= FRAMEBUFFER_DIRTY; ctx->draw_stamp = stfb->stamp; } }
static int thread_function(void *thread_data) { int thread_id = *((int *) thread_data); LOG("thread %d starting\n", thread_id); os_time_sleep(thread_id * 100 * 1000); LOG("thread %d before barrier\n", thread_id); CHECK(p_atomic_read(&proceeded) == 0); p_atomic_inc(&waiting); pipe_barrier_wait(&barrier); CHECK(p_atomic_read(&waiting) == NUM_THREADS); p_atomic_inc(&proceeded); LOG("thread %d exiting\n", thread_id); return 0; }
static void st_viewport(struct gl_context *ctx) { struct st_context *st = ctx->st; struct st_framebuffer *stdraw; struct st_framebuffer *stread; if (!st->invalidate_on_gl_viewport) return; /* * Normally we'd want the state tracker manager to mark the drawables * invalid only when needed. This will force the state tracker manager * to revalidate the drawable, rather than just update the context with * the latest cached drawable info. */ stdraw = st_ws_framebuffer(st->ctx->DrawBuffer); stread = st_ws_framebuffer(st->ctx->ReadBuffer); if (stdraw) stdraw->iface_stamp = p_atomic_read(&stdraw->iface->stamp) - 1; if (stread && stread != stdraw) stread->iface_stamp = p_atomic_read(&stread->iface->stamp) - 1; }
/** * Add a color renderbuffer on demand. */ boolean st_manager_add_color_renderbuffer(struct st_context *st, struct gl_framebuffer *fb, gl_buffer_index idx) { struct st_framebuffer *stfb = st_ws_framebuffer(fb); /* FBO */ if (!stfb) return FALSE; if (stfb->Base.Attachment[idx].Renderbuffer) return TRUE; switch (idx) { case BUFFER_FRONT_LEFT: case BUFFER_BACK_LEFT: case BUFFER_FRONT_RIGHT: case BUFFER_BACK_RIGHT: break; default: return FALSE; break; } if (!st_framebuffer_add_renderbuffer(stfb, idx)) return FALSE; st_framebuffer_update_attachments(stfb); /* * Force a call to the state tracker manager to validate the * new renderbuffer. It might be that there is a window system * renderbuffer available. */ if(stfb->iface) stfb->iface_stamp = p_atomic_read(&stfb->iface->stamp) - 1; st_invalidate_state(st->ctx, _NEW_BUFFERS); return TRUE; }
void svga_context_flush_buffers(struct svga_context *svga) { struct list_head *curr, *next; struct svga_buffer *sbuf; curr = svga->dirty_buffers.next; next = curr->next; while(curr != &svga->dirty_buffers) { sbuf = LIST_ENTRY(struct svga_buffer, curr, head); assert(p_atomic_read(&sbuf->b.b.reference.count) != 0); assert(sbuf->dma.pending); svga_buffer_upload_flush(svga, sbuf); curr = next; next = curr->next; } }
static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage) { struct radeon_bo *bo = get_radeon_bo(_buf); while (p_atomic_read(&bo->num_active_ioctls)) { sched_yield(); } if (bo->rws->info.drm_minor >= 12) { struct drm_radeon_gem_wait args = {}; args.handle = bo->handle; args.flags = usage; while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, &args, sizeof(args)) == -EBUSY); } else { struct drm_radeon_gem_wait_idle args = {}; args.handle = bo->handle; while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, &args, sizeof(args)) == -EBUSY); } }
static boolean radeon_bo_is_busy(struct pb_buffer *_buf, enum radeon_bo_usage usage) { struct radeon_bo *bo = get_radeon_bo(_buf); if (p_atomic_read(&bo->num_active_ioctls)) { return TRUE; } if (bo->rws->info.drm_minor >= 12) { struct drm_radeon_gem_wait args = {}; args.handle = bo->handle; args.flags = usage | RADEON_GEM_NO_WAIT; return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, &args, sizeof(args)) != 0; } else { struct drm_radeon_gem_busy args = {}; args.handle = bo->handle; return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)) != 0; } }
int main(int argc, char *argv[]) { int i; for (i = 1; i < argc; ++i) { const char *arg = argv[i]; if (strcmp(arg, "-v") == 0) { ++verbosity; } else { fprintf(stderr, "error: unrecognized option `%s`\n", arg); exit(EXIT_FAILURE); } } // Disable buffering setbuf(stdout, NULL); LOG("pipe_barrier_test starting\n"); pipe_barrier_init(&barrier, NUM_THREADS); for (i = 0; i < NUM_THREADS; i++) { thread_ids[i] = i; threads[i] = u_thread_create(thread_function, (void *) &thread_ids[i]); } for (i = 0; i < NUM_THREADS; i++ ) { thrd_join(threads[i], NULL); } CHECK(p_atomic_read(&proceeded) == NUM_THREADS); pipe_barrier_destroy(&barrier); LOG("pipe_barrier_test exiting\n"); return 0; }
static void radeon_bo_set_tiling(struct pb_buffer *_buf, struct radeon_winsys_cs *rcs, enum radeon_bo_layout microtiled, enum radeon_bo_layout macrotiled, uint32_t pitch) { struct radeon_bo *bo = get_radeon_bo(_buf); struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct drm_radeon_gem_set_tiling args = {}; /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { cs->flush_cs(cs->flush_data, 0); } while (p_atomic_read(&bo->num_active_ioctls)) { sched_yield(); } if (microtiled == RADEON_LAYOUT_TILED) args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE; else if (microtiled == RADEON_LAYOUT_SQUARETILED) args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE; if (macrotiled == RADEON_LAYOUT_TILED) args.tiling_flags |= RADEON_BO_FLAGS_MACRO_TILE; args.handle = bo->handle; args.pitch = pitch; drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_SET_TILING, &args, sizeof(args)); }
/** * Create a framebuffer from a manager interface. */ static struct st_framebuffer * st_framebuffer_create(struct st_context *st, struct st_framebuffer_iface *stfbi) { struct st_framebuffer *stfb; struct gl_config mode; gl_buffer_index idx; if (!stfbi) return NULL; stfb = CALLOC_STRUCT(st_framebuffer); if (!stfb) return NULL; st_visual_to_context_mode(stfbi->visual, &mode); /* * For desktop GL, sRGB framebuffer write is controlled by both the * capability of the framebuffer and GL_FRAMEBUFFER_SRGB. We should * advertise the capability when the pipe driver (and core Mesa) supports * it so that applications can enable sRGB write when they want to. * * This is not to be confused with GLX_FRAMEBUFFER_SRGB_CAPABLE_ARB. When * the attribute is GLX_TRUE, it tells the st manager to pick a color * format such that util_format_srgb(visual->color_format) can be supported * by the pipe driver. We still need to advertise the capability here. * * For GLES, however, sRGB framebuffer write is controlled only by the * capability of the framebuffer. There is GL_EXT_sRGB_write_control to * give applications the control back, but sRGB write is still enabled by * default. To avoid unexpected results, we should not advertise the * capability. This could change when we add support for * EGL_KHR_gl_colorspace. */ if (_mesa_is_desktop_gl(st->ctx)) { struct pipe_screen *screen = st->pipe->screen; const enum pipe_format srgb_format = util_format_srgb(stfbi->visual->color_format); if (srgb_format != PIPE_FORMAT_NONE && st_pipe_format_to_mesa_format(srgb_format) != MESA_FORMAT_NONE && screen->is_format_supported(screen, srgb_format, PIPE_TEXTURE_2D, stfbi->visual->samples, PIPE_BIND_RENDER_TARGET)) mode.sRGBCapable = GL_TRUE; } _mesa_initialize_window_framebuffer(&stfb->Base, &mode); stfb->iface = stfbi; stfb->iface_stamp = p_atomic_read(&stfbi->stamp) - 1; /* add the color buffer */ idx = stfb->Base._ColorDrawBufferIndexes[0]; if (!st_framebuffer_add_renderbuffer(stfb, idx)) { free(stfb); return NULL; } st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH); st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM); stfb->stamp = 0; st_framebuffer_update_attachments(stfb); return stfb; }
/** * Validate a framebuffer to make sure up-to-date pipe_textures are used. * The context we need to pass in is s dummy context needed only to be * able to get a pipe context to create pipe surfaces, and to have a * context to call _mesa_resize_framebuffer(): * (That should probably be rethought, since those surfaces become * drawable state, not context state, and can be freed by another pipe * context). */ static void st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) { struct pipe_resource *textures[ST_ATTACHMENT_COUNT]; uint width, height; unsigned i; boolean changed = FALSE; int32_t new_stamp = p_atomic_read(&stfb->iface->stamp); if (stfb->iface_stamp == new_stamp) return; /* validate the fb */ do { if (!stfb->iface->validate(stfb->iface, stfb->statts, stfb->num_statts, textures)) return; stfb->iface_stamp = new_stamp; new_stamp = p_atomic_read(&stfb->iface->stamp); } while(stfb->iface_stamp != new_stamp); width = stfb->Base.Width; height = stfb->Base.Height; for (i = 0; i < stfb->num_statts; i++) { struct st_renderbuffer *strb; struct pipe_surface *ps, surf_tmpl; gl_buffer_index idx; if (!textures[i]) continue; idx = attachment_to_buffer_index(stfb->statts[i]); if (idx >= BUFFER_COUNT) { pipe_resource_reference(&textures[i], NULL); continue; } strb = st_renderbuffer(stfb->Base.Attachment[idx].Renderbuffer); assert(strb); if (strb->texture == textures[i]) { pipe_resource_reference(&textures[i], NULL); continue; } u_surface_default_template(&surf_tmpl, textures[i], PIPE_BIND_RENDER_TARGET); ps = st->pipe->create_surface(st->pipe, textures[i], &surf_tmpl); if (ps) { pipe_surface_reference(&strb->surface, ps); pipe_resource_reference(&strb->texture, ps->texture); /* ownership transfered */ pipe_surface_reference(&ps, NULL); changed = TRUE; strb->Base.Width = strb->surface->width; strb->Base.Height = strb->surface->height; width = strb->Base.Width; height = strb->Base.Height; } pipe_resource_reference(&textures[i], NULL); } if (changed) { ++stfb->stamp; _mesa_resize_framebuffer(st->ctx, &stfb->Base, width, height); } }
/** * Bind the context to the given framebuffers. */ static boolean vg_context_bind_framebuffers(struct st_context_iface *stctxi, struct st_framebuffer_iface *stdrawi, struct st_framebuffer_iface *streadi) { struct vg_context *ctx = (struct vg_context *) stctxi; struct st_framebuffer *stfb; enum st_attachment_type strb_att; /* the draw and read framebuffers must be the same */ if (stdrawi != streadi) return FALSE; strb_att = (stdrawi) ? choose_attachment(stdrawi) : ST_ATTACHMENT_INVALID; if (ctx->draw_buffer) { stfb = ctx->draw_buffer; /* free the existing fb */ if (!stdrawi || stfb->strb_att != strb_att || stfb->strb->format != stdrawi->visual->color_format) { destroy_renderbuffer(stfb->strb); destroy_renderbuffer(stfb->dsrb); FREE(stfb); ctx->draw_buffer = NULL; } } if (!stdrawi) return TRUE; if (strb_att == ST_ATTACHMENT_INVALID) return FALSE; /* create a new fb */ if (!ctx->draw_buffer) { stfb = CALLOC_STRUCT(st_framebuffer); if (!stfb) return FALSE; stfb->strb = create_renderbuffer(stdrawi->visual->color_format); if (!stfb->strb) { FREE(stfb); return FALSE; } stfb->dsrb = create_renderbuffer(ctx->ds_format); if (!stfb->dsrb) { FREE(stfb->strb); FREE(stfb); return FALSE; } stfb->width = 0; stfb->height = 0; stfb->strb_att = strb_att; stfb->stamp = 1; stfb->iface_stamp = p_atomic_read(&stdrawi->stamp) - 1; ctx->draw_buffer = stfb; } ctx->draw_buffer->iface = stdrawi; ctx->draw_stamp = ctx->draw_buffer->stamp - 1; return TRUE; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. */ if (flags & RADEON_FLUSH_COMPUTE) { if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } radeon_drm_cs_sync_flush(rcs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; radeon_drm_ws_queue_cs(cs->ws, cs); } else { pipe_mutex_lock(cs->ws->cs_stack_lock); if (cs->ws->thread) { while (p_atomic_read(&cs->ws->ncs)) { pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock); } } pipe_mutex_unlock(cs->ws->cs_stack_lock); radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static void *radeon_bo_map_internal(struct pb_buffer *_buf, unsigned flags, void *flush_ctx) { struct radeon_bo *bo = radeon_bo(_buf); struct radeon_drm_cs *cs = flush_ctx; struct drm_radeon_gem_mmap args = {}; void *ptr; /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ if (!(flags & PB_USAGE_UNSYNCHRONIZED)) { /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ if (flags & PB_USAGE_DONTBLOCK) { if (!(flags & PB_USAGE_CPU_WRITE)) { /* Mapping for read. * * Since we are mapping for read, we don't need to wait * if the GPU is using the buffer for read too * (neither one is changing it). * * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); return NULL; } if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_WRITE)) { return NULL; } } else { if (radeon_bo_is_referenced_by_cs(cs, bo)) { cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); return NULL; } if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) { return NULL; } } } else { if (!(flags & PB_USAGE_CPU_WRITE)) { /* Mapping for read. * * Since we are mapping for read, we don't need to wait * if the GPU is using the buffer for read too * (neither one is changing it). * * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, 0); } radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_WRITE); } else { /* Mapping for write. */ if (radeon_bo_is_referenced_by_cs(cs, bo)) { cs->flush_cs(cs->flush_data, 0); } else { /* Try to avoid busy-waiting in radeon_bo_wait. */ if (p_atomic_read(&bo->num_active_ioctls)) radeon_drm_cs_sync_flush(cs); } radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE); } } } /* Return the pointer if it's already mapped. */ if (bo->ptr) return bo->ptr; /* Map the buffer. */ pipe_mutex_lock(bo->map_mutex); /* Return the pointer if it's already mapped (in case of a race). */ if (bo->ptr) { pipe_mutex_unlock(bo->map_mutex); return bo->ptr; } args.handle = bo->handle; args.offset = 0; args.size = (uint64_t)bo->size; if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_MMAP, &args, sizeof(args))) { pipe_mutex_unlock(bo->map_mutex); fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", bo, bo->handle); return NULL; } ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, bo->rws->fd, args.addr_ptr); if (ptr == MAP_FAILED) { pipe_mutex_unlock(bo->map_mutex); fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); return NULL; } bo->ptr = ptr; pipe_mutex_unlock(bo->map_mutex); return bo->ptr; }
void * vmw_svga_winsys_surface_map(struct svga_winsys_context *swc, struct svga_winsys_surface *srf, unsigned flags, boolean *retry) { struct vmw_svga_winsys_surface *vsrf = vmw_svga_winsys_surface(srf); void *data = NULL; struct pb_buffer *pb_buf; uint32_t pb_flags; struct vmw_winsys_screen *vws = vsrf->screen; *retry = FALSE; assert((flags & (PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE)) != 0); pipe_mutex_lock(vsrf->mutex); if (vsrf->mapcount) { /* * Only allow multiple readers to map. */ if ((flags & PIPE_TRANSFER_WRITE) || (vsrf->map_mode & PIPE_TRANSFER_WRITE)) goto out_unlock; data = vsrf->data; goto out_mapped; } vsrf->rebind = FALSE; /* * If we intend to read, there's no point discarding the * data if busy. */ if (flags & PIPE_TRANSFER_READ || vsrf->shared) flags &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; /* * Discard is a hint to a synchronized map. */ if (flags & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) flags &= ~PIPE_TRANSFER_UNSYNCHRONIZED; /* * The surface is allowed to be referenced on the command stream iff * we're mapping unsynchronized or discard. This is an early check. * We need to recheck after a failing discard map. */ if (!(flags & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | PIPE_TRANSFER_UNSYNCHRONIZED)) && p_atomic_read(&vsrf->validated)) { *retry = TRUE; goto out_unlock; } pb_flags = flags & (PIPE_TRANSFER_READ_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); if (flags & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { struct pb_manager *provider; struct pb_desc desc; /* * First, if possible, try to map existing storage with DONTBLOCK. */ if (!p_atomic_read(&vsrf->validated)) { data = vmw_svga_winsys_buffer_map(&vws->base, vsrf->buf, PIPE_TRANSFER_DONTBLOCK | pb_flags); if (data) goto out_mapped; } /* * Attempt to get a new buffer. */ provider = vws->pools.mob_fenced; memset(&desc, 0, sizeof(desc)); desc.alignment = 4096; pb_buf = provider->create_buffer(provider, vsrf->size, &desc); if (pb_buf != NULL) { struct svga_winsys_buffer *vbuf = vmw_svga_winsys_buffer_wrap(pb_buf); data = vmw_svga_winsys_buffer_map(&vws->base, vbuf, pb_flags); if (data) { vsrf->rebind = TRUE; /* * We've discarded data on this surface and thus * it's data is no longer consider referenced. */ vmw_swc_surface_clear_reference(swc, vsrf); if (vsrf->buf) vmw_svga_winsys_buffer_destroy(&vws->base, vsrf->buf); vsrf->buf = vbuf; goto out_mapped; } else vmw_svga_winsys_buffer_destroy(&vws->base, vbuf); } /* * We couldn't get and map a new buffer for some reason. * Fall through to an ordinary map. * But tell pipe driver to flush now if already on validate list, * Otherwise we'll overwrite previous contents. */ if (!(flags & PIPE_TRANSFER_UNSYNCHRONIZED) && p_atomic_read(&vsrf->validated)) { *retry = TRUE; goto out_unlock; } } pb_flags |= (flags & PIPE_TRANSFER_DONTBLOCK); data = vmw_svga_winsys_buffer_map(&vws->base, vsrf->buf, pb_flags); if (data == NULL) goto out_unlock; out_mapped: ++vsrf->mapcount; vsrf->data = data; vsrf->map_mode = flags & (PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE); out_unlock: pipe_mutex_unlock(vsrf->mutex); return data; }
int iris_bo_busy(struct iris_bo *bo) { struct iris_bufmgr *bufmgr = bo->bufmgr; struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); if (ret == 0) { bo->idle = !busy.busy; return busy.busy; } return false; } int iris_bo_madvise(struct iris_bo *bo, int state) { struct drm_i915_gem_madvise madv = { .handle = bo->gem_handle, .madv = state, .retained = 1, }; drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); return madv.retained; } /* drop the oldest entries that have been purged by the kernel */ static void iris_bo_cache_purge_bucket(struct iris_bufmgr *bufmgr, struct bo_cache_bucket *bucket) { list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { if (iris_bo_madvise(bo, I915_MADV_DONTNEED)) break; list_del(&bo->head); bo_free(bo); } } static struct iris_bo * bo_calloc(void) { struct iris_bo *bo = calloc(1, sizeof(*bo)); if (bo) { bo->hash = _mesa_hash_pointer(bo); } return bo; } static struct iris_bo * bo_alloc_internal(struct iris_bufmgr *bufmgr, const char *name, uint64_t size, enum iris_memory_zone memzone, unsigned flags, uint32_t tiling_mode, uint32_t stride) { struct iris_bo *bo; unsigned int page_size = getpagesize(); int ret; struct bo_cache_bucket *bucket; bool alloc_from_cache; uint64_t bo_size; bool zeroed = false; if (flags & BO_ALLOC_ZEROED) zeroed = true; if ((flags & BO_ALLOC_COHERENT) && !bufmgr->has_llc) { bo_size = MAX2(ALIGN(size, page_size), page_size); bucket = NULL; goto skip_cache; } /* Round the allocated size up to a power of two number of pages. */ bucket = bucket_for_size(bufmgr, size); /* If we don't have caching at this size, don't actually round the * allocation up. */ if (bucket == NULL) { bo_size = MAX2(ALIGN(size, page_size), page_size); } else { bo_size = bucket->size; } mtx_lock(&bufmgr->lock); /* Get a buffer out of the cache if available */ retry: alloc_from_cache = false; if (bucket != NULL && !list_empty(&bucket->head)) { /* If the last BO in the cache is idle, then reuse it. Otherwise, * allocate a fresh buffer to avoid stalling. */ bo = LIST_ENTRY(struct iris_bo, bucket->head.next, head); if (!iris_bo_busy(bo)) { alloc_from_cache = true; list_del(&bo->head); } if (alloc_from_cache) { if (!iris_bo_madvise(bo, I915_MADV_WILLNEED)) { bo_free(bo); iris_bo_cache_purge_bucket(bufmgr, bucket); goto retry; } if (bo_set_tiling_internal(bo, tiling_mode, stride)) { bo_free(bo); goto retry; } if (zeroed) { void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); if (!map) { bo_free(bo); goto retry; } memset(map, 0, bo_size); } } } if (alloc_from_cache) { /* If the cached BO isn't in the right memory zone, free the old * memory and assign it a new address. */ if (memzone != iris_memzone_for_address(bo->gtt_offset)) { vma_free(bufmgr, bo->gtt_offset, bo->size); bo->gtt_offset = 0ull; } } else { skip_cache: bo = bo_calloc(); if (!bo) goto err; bo->size = bo_size; bo->idle = true; struct drm_i915_gem_create create = { .size = bo_size }; /* All new BOs we get from the kernel are zeroed, so we don't need to * worry about that here. */ ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create); if (ret != 0) { free(bo); goto err; } bo->gem_handle = create.handle; bo->bufmgr = bufmgr; bo->tiling_mode = I915_TILING_NONE; bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; bo->stride = 0; if (bo_set_tiling_internal(bo, tiling_mode, stride)) goto err_free; /* Calling set_domain() will allocate pages for the BO outside of the * struct mutex lock in the kernel, which is more efficient than waiting * to create them during the first execbuf that uses the BO. */ struct drm_i915_gem_set_domain sd = { .handle = bo->gem_handle, .read_domains = I915_GEM_DOMAIN_CPU, .write_domain = 0, }; if (drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) goto err_free; } bo->name = name; p_atomic_set(&bo->refcount, 1); bo->reusable = bucket && bufmgr->bo_reuse; bo->cache_coherent = bufmgr->has_llc; bo->index = -1; bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; /* By default, capture all driver-internal buffers like shader kernels, * surface states, dynamic states, border colors, and so on. */ if (memzone < IRIS_MEMZONE_OTHER) bo->kflags |= EXEC_OBJECT_CAPTURE; if (bo->gtt_offset == 0ull) { bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1); if (bo->gtt_offset == 0ull) goto err_free; } mtx_unlock(&bufmgr->lock); if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) { struct drm_i915_gem_caching arg = { .handle = bo->gem_handle, .caching = 1, }; if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0) { bo->cache_coherent = true; bo->reusable = false; } } DBG("bo_create: buf %d (%s) (%s memzone) %llub\n", bo->gem_handle, bo->name, memzone_name(memzone), (unsigned long long) size); return bo; err_free: bo_free(bo); err: mtx_unlock(&bufmgr->lock); return NULL; } struct iris_bo * iris_bo_alloc(struct iris_bufmgr *bufmgr, const char *name, uint64_t size, enum iris_memory_zone memzone) { return bo_alloc_internal(bufmgr, name, size, memzone, 0, I915_TILING_NONE, 0); } struct iris_bo * iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr, const char *name, uint64_t size, enum iris_memory_zone memzone, uint32_t tiling_mode, uint32_t pitch, unsigned flags) { return bo_alloc_internal(bufmgr, name, size, memzone, flags, tiling_mode, pitch); } struct iris_bo * iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name, void *ptr, size_t size, enum iris_memory_zone memzone) { struct iris_bo *bo; bo = bo_calloc(); if (!bo) return NULL; struct drm_i915_gem_userptr arg = { .user_ptr = (uintptr_t)ptr, .user_size = size, }; if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) goto err_free; bo->gem_handle = arg.handle; /* Check the buffer for validity before we try and use it in a batch */ struct drm_i915_gem_set_domain sd = { .handle = bo->gem_handle, .read_domains = I915_GEM_DOMAIN_CPU, }; if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd)) goto err_close; bo->name = name; bo->size = size; bo->map_cpu = ptr; bo->bufmgr = bufmgr; bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; bo->gtt_offset = vma_alloc(bufmgr, memzone, size, 1); if (bo->gtt_offset == 0ull) goto err_close; p_atomic_set(&bo->refcount, 1); bo->userptr = true; bo->cache_coherent = true; bo->index = -1; bo->idle = true; return bo; err_close: drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &bo->gem_handle); err_free: free(bo); return NULL; } /** * Returns a iris_bo wrapping the given buffer object handle. * * This can be used when one application needs to pass a buffer object * to another. */ struct iris_bo * iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr, const char *name, unsigned int handle) { struct iris_bo *bo; /* At the moment most applications only have a few named bo. * For instance, in a DRI client only the render buffers passed * between X and the client are named. And since X returns the * alternating names for the front/back buffer a linear search * provides a sufficiently fast match. */ mtx_lock(&bufmgr->lock); bo = hash_find_bo(bufmgr->name_table, handle); if (bo) { iris_bo_reference(bo); goto out; } struct drm_gem_open open_arg = { .name = handle }; int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); if (ret != 0) { DBG("Couldn't reference %s handle 0x%08x: %s\n", name, handle, strerror(errno)); bo = NULL; goto out; } /* Now see if someone has used a prime handle to get this * object from the kernel before by looking through the list * again for a matching gem_handle */ bo = hash_find_bo(bufmgr->handle_table, open_arg.handle); if (bo) { iris_bo_reference(bo); goto out; } bo = bo_calloc(); if (!bo) goto out; p_atomic_set(&bo->refcount, 1); bo->size = open_arg.size; bo->gtt_offset = 0; bo->bufmgr = bufmgr; bo->gem_handle = open_arg.handle; bo->name = name; bo->global_name = handle; bo->reusable = false; bo->external = true; bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1); _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); if (ret != 0) goto err_unref; bo->tiling_mode = get_tiling.tiling_mode; bo->swizzle_mode = get_tiling.swizzle_mode; /* XXX stride is unknown */ DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name); out: mtx_unlock(&bufmgr->lock); return bo; err_unref: bo_free(bo); mtx_unlock(&bufmgr->lock); return NULL; } static void bo_free(struct iris_bo *bo) { struct iris_bufmgr *bufmgr = bo->bufmgr; if (bo->map_cpu && !bo->userptr) { VG_NOACCESS(bo->map_cpu, bo->size); munmap(bo->map_cpu, bo->size); } if (bo->map_wc) { VG_NOACCESS(bo->map_wc, bo->size); munmap(bo->map_wc, bo->size); } if (bo->map_gtt) { VG_NOACCESS(bo->map_gtt, bo->size); munmap(bo->map_gtt, bo->size); } if (bo->external) { struct hash_entry *entry; if (bo->global_name) { entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name); _mesa_hash_table_remove(bufmgr->name_table, entry); } entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); _mesa_hash_table_remove(bufmgr->handle_table, entry); } /* Close this object */ struct drm_gem_close close = { .handle = bo->gem_handle }; int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", bo->gem_handle, bo->name, strerror(errno)); } vma_free(bo->bufmgr, bo->gtt_offset, bo->size); free(bo); } /** Frees all cached buffers significantly older than @time. */ static void cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time) { int i; if (bufmgr->time == time) return; for (i = 0; i < bufmgr->num_buckets; i++) { struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { if (time - bo->free_time <= 1) break; list_del(&bo->head); bo_free(bo); } } bufmgr->time = time; } static void bo_unreference_final(struct iris_bo *bo, time_t time) { struct iris_bufmgr *bufmgr = bo->bufmgr; struct bo_cache_bucket *bucket; DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); bucket = NULL; if (bo->reusable) bucket = bucket_for_size(bufmgr, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ if (bucket && iris_bo_madvise(bo, I915_MADV_DONTNEED)) { bo->free_time = time; bo->name = NULL; list_addtail(&bo->head, &bucket->head); } else { bo_free(bo); } } void iris_bo_unreference(struct iris_bo *bo) { if (bo == NULL) return; assert(p_atomic_read(&bo->refcount) > 0); if (atomic_add_unless(&bo->refcount, -1, 1)) { struct iris_bufmgr *bufmgr = bo->bufmgr; struct timespec time; clock_gettime(CLOCK_MONOTONIC, &time); mtx_lock(&bufmgr->lock); if (p_atomic_dec_zero(&bo->refcount)) { bo_unreference_final(bo, time.tv_sec); cleanup_bo_cache(bufmgr, time.tv_sec); } mtx_unlock(&bufmgr->lock); } } static void bo_wait_with_stall_warning(struct pipe_debug_callback *dbg, struct iris_bo *bo, const char *action) { bool busy = dbg && !bo->idle; double elapsed = unlikely(busy) ? -get_time() : 0.0; iris_bo_wait_rendering(bo); if (unlikely(busy)) { elapsed += get_time(); if (elapsed > 1e-5) /* 0.01ms */ { perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n", action, bo->name, elapsed * 1000); } } } static void print_flags(unsigned flags) { if (flags & MAP_READ) DBG("READ "); if (flags & MAP_WRITE) DBG("WRITE "); if (flags & MAP_ASYNC) DBG("ASYNC "); if (flags & MAP_PERSISTENT) DBG("PERSISTENT "); if (flags & MAP_COHERENT) DBG("COHERENT "); if (flags & MAP_RAW) DBG("RAW "); DBG("\n"); } static void * iris_bo_map_cpu(struct pipe_debug_callback *dbg, struct iris_bo *bo, unsigned flags) { struct iris_bufmgr *bufmgr = bo->bufmgr; /* We disallow CPU maps for writing to non-coherent buffers, as the * CPU map can become invalidated when a batch is flushed out, which * can happen at unpredictable times. You should use WC maps instead. */ assert(bo->cache_coherent || !(flags & MAP_WRITE)); if (!bo->map_cpu) { DBG("iris_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name); struct drm_i915_gem_mmap mmap_arg = { .handle = bo->gem_handle, .size = bo->size, }; int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); if (ret != 0) { DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); return NULL; } void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; VG_DEFINED(map, bo->size); if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) { VG_NOACCESS(map, bo->size); munmap(map, bo->size); } } assert(bo->map_cpu); DBG("iris_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_cpu); print_flags(flags); if (!(flags & MAP_ASYNC)) { bo_wait_with_stall_warning(dbg, bo, "CPU mapping"); } if (!bo->cache_coherent && !bo->bufmgr->has_llc) { /* If we're reusing an existing CPU mapping, the CPU caches may * contain stale data from the last time we read from that mapping. * (With the BO cache, it might even be data from a previous buffer!) * Even if it's a brand new mapping, the kernel may have zeroed the * buffer via CPU writes. * * We need to invalidate those cachelines so that we see the latest * contents, and so long as we only read from the CPU mmap we do not * need to write those cachelines back afterwards. * * On LLC, the emprical evidence suggests that writes from the GPU * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU * cachelines. (Other reads, such as the display engine, bypass the * LLC entirely requiring us to keep dirty pixels for the scanout * out of any cache.) */ gen_invalidate_range(bo->map_cpu, bo->size); } return bo->map_cpu; } static void * iris_bo_map_wc(struct pipe_debug_callback *dbg, struct iris_bo *bo, unsigned flags) { struct iris_bufmgr *bufmgr = bo->bufmgr; if (!bo->map_wc) { DBG("iris_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name); struct drm_i915_gem_mmap mmap_arg = { .handle = bo->gem_handle, .size = bo->size, .flags = I915_MMAP_WC, }; int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); if (ret != 0) { DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); return NULL; } void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; VG_DEFINED(map, bo->size); if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) { VG_NOACCESS(map, bo->size); munmap(map, bo->size); } } assert(bo->map_wc); DBG("iris_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc); print_flags(flags); if (!(flags & MAP_ASYNC)) { bo_wait_with_stall_warning(dbg, bo, "WC mapping"); } return bo->map_wc; } /** * Perform an uncached mapping via the GTT. * * Write access through the GTT is not quite fully coherent. On low power * systems especially, like modern Atoms, we can observe reads from RAM before * the write via GTT has landed. A write memory barrier that flushes the Write * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later * read after the write as the GTT write suffers a small delay through the GTT * indirection. The kernel uses an uncached mmio read to ensure the GTT write * is ordered with reads (either by the GPU, WB or WC) and unconditionally * flushes prior to execbuf submission. However, if we are not informing the * kernel about our GTT writes, it will not flush before earlier access, such * as when using the cmdparser. Similarly, we need to be careful if we should * ever issue a CPU read immediately following a GTT write. * * Telling the kernel about write access also has one more important * side-effect. Upon receiving notification about the write, it cancels any * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by * either SW_FINISH or DIRTYFB. The presumption is that we never write to the * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR * tracking is handled on the buffer exchange instead. */ static void * iris_bo_map_gtt(struct pipe_debug_callback *dbg, struct iris_bo *bo, unsigned flags) { struct iris_bufmgr *bufmgr = bo->bufmgr; /* Get a mapping of the buffer if we haven't before. */ if (bo->map_gtt == NULL) { DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name); struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle }; /* Get the fake offset back... */ int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); if (ret != 0) { DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); return NULL; } /* and mmap it. */ void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, bufmgr->fd, mmap_arg.offset); if (map == MAP_FAILED) { DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); return NULL; } /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will * already intercept this mmap call. However, for consistency between * all the mmap paths, we mark the pointer as defined now and mark it * as inaccessible afterwards. */ VG_DEFINED(map, bo->size); if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) { VG_NOACCESS(map, bo->size); munmap(map, bo->size); } } assert(bo->map_gtt); DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt); print_flags(flags); if (!(flags & MAP_ASYNC)) { bo_wait_with_stall_warning(dbg, bo, "GTT mapping"); } return bo->map_gtt; } static bool can_map_cpu(struct iris_bo *bo, unsigned flags) { if (bo->cache_coherent) return true; /* Even if the buffer itself is not cache-coherent (such as a scanout), on * an LLC platform reads always are coherent (as they are performed via the * central system agent). It is just the writes that we need to take special * care to ensure that land in main memory and not stick in the CPU cache. */ if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc) return true; /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid * across batch flushes where the kernel will change cache domains of the * bo, invalidating continued access to the CPU mmap on non-LLC device. * * Similarly, ASYNC typically means that the buffer will be accessed via * both the CPU and the GPU simultaneously. Batches may be executed that * use the BO even while it is mapped. While OpenGL technically disallows * most drawing while non-persistent mappings are active, we may still use * the GPU for blits or other operations, causing batches to happen at * inconvenient times. * * If RAW is set, we expect the caller to be able to handle a WC buffer * more efficiently than the involuntary clflushes. */ if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC | MAP_RAW)) return false; return !(flags & MAP_WRITE); } void * iris_bo_map(struct pipe_debug_callback *dbg, struct iris_bo *bo, unsigned flags) { if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) return iris_bo_map_gtt(dbg, bo, flags); void *map; if (can_map_cpu(bo, flags)) map = iris_bo_map_cpu(dbg, bo, flags); else map = iris_bo_map_wc(dbg, bo, flags); /* Allow the attempt to fail by falling back to the GTT where necessary. * * Not every buffer can be mmaped directly using the CPU (or WC), for * example buffers that wrap stolen memory or are imported from other * devices. For those, we have little choice but to use a GTT mmapping. * However, if we use a slow GTT mmapping for reads where we expected fast * access, that order of magnitude difference in throughput will be clearly * expressed by angry users. * * We skip MAP_RAW because we want to avoid map_gtt's fence detiling. */ if (!map && !(flags & MAP_RAW)) { perf_debug(dbg, "Fallback GTT mapping for %s with access flags %x\n", bo->name, flags); map = iris_bo_map_gtt(dbg, bo, flags); } return map; } /** Waits for all GPU rendering with the object to have completed. */ void iris_bo_wait_rendering(struct iris_bo *bo) { /* We require a kernel recent enough for WAIT_IOCTL support. * See intel_init_bufmgr() */ iris_bo_wait(bo, -1); } /** * Waits on a BO for the given amount of time. * * @bo: buffer object to wait for * @timeout_ns: amount of time to wait in nanoseconds. * If value is less than 0, an infinite wait will occur. * * Returns 0 if the wait was successful ie. the last batch referencing the * object has completed within the allotted time. Otherwise some negative return * value describes the error. Of particular interest is -ETIME when the wait has * failed to yield the desired result. * * Similar to iris_bo_wait_rendering except a timeout parameter allows * the operation to give up after a certain amount of time. Another subtle * difference is the internal locking semantics are different (this variant does * not hold the lock for the duration of the wait). This makes the wait subject * to a larger userspace race window. * * The implementation shall wait until the object is no longer actively * referenced within a batch buffer at the time of the call. The wait will * not guarantee that the buffer is re-issued via another thread, or an flinked * handle. Userspace must make sure this race does not occur if such precision * is important. * * Note that some kernels have broken the inifite wait for negative values * promise, upgrade to latest stable kernels if this is the case. */ int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns) { struct iris_bufmgr *bufmgr = bo->bufmgr; /* If we know it's idle, don't bother with the kernel round trip */ if (bo->idle && !bo->external) return 0; struct drm_i915_gem_wait wait = { .bo_handle = bo->gem_handle, .timeout_ns = timeout_ns, }; int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); if (ret != 0) return -errno; bo->idle = true; return ret; } void iris_bufmgr_destroy(struct iris_bufmgr *bufmgr) { mtx_destroy(&bufmgr->lock); /* Free any cached buffer objects we were going to reuse */ for (int i = 0; i < bufmgr->num_buckets; i++) { struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { list_del(&bo->head); bo_free(bo); } } _mesa_hash_table_destroy(bufmgr->name_table, NULL); _mesa_hash_table_destroy(bufmgr->handle_table, NULL); for (int z = 0; z < IRIS_MEMZONE_COUNT; z++) { if (z != IRIS_MEMZONE_BINDER) util_vma_heap_finish(&bufmgr->vma_allocator[z]); } free(bufmgr); } static int bo_set_tiling_internal(struct iris_bo *bo, uint32_t tiling_mode, uint32_t stride) { struct iris_bufmgr *bufmgr = bo->bufmgr; struct drm_i915_gem_set_tiling set_tiling; int ret; if (bo->global_name == 0 && tiling_mode == bo->tiling_mode && stride == bo->stride) return 0; memset(&set_tiling, 0, sizeof(set_tiling)); do { /* set_tiling is slightly broken and overwrites the * input on the error path, so we have to open code * drm_ioctl. */ set_tiling.handle = bo->gem_handle; set_tiling.tiling_mode = tiling_mode; set_tiling.stride = stride; ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); if (ret == -1) return -errno; bo->tiling_mode = set_tiling.tiling_mode; bo->swizzle_mode = set_tiling.swizzle_mode; bo->stride = set_tiling.stride; return 0; } int iris_bo_get_tiling(struct iris_bo *bo, uint32_t *tiling_mode, uint32_t *swizzle_mode) { *tiling_mode = bo->tiling_mode; *swizzle_mode = bo->swizzle_mode; return 0; } struct iris_bo * iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd) { uint32_t handle; struct iris_bo *bo; mtx_lock(&bufmgr->lock); int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); if (ret) { DBG("import_dmabuf: failed to obtain handle from fd: %s\n", strerror(errno)); mtx_unlock(&bufmgr->lock); return NULL; } /* * See if the kernel has already returned this buffer to us. Just as * for named buffers, we must not create two bo's pointing at the same * kernel object */ bo = hash_find_bo(bufmgr->handle_table, handle); if (bo) { iris_bo_reference(bo); goto out; } bo = bo_calloc(); if (!bo) goto out; p_atomic_set(&bo->refcount, 1); /* Determine size of bo. The fd-to-handle ioctl really should * return the size, but it doesn't. If we have kernel 3.12 or * later, we can lseek on the prime fd to get the size. Older * kernels will just fail, in which case we fall back to the * provided (estimated or guess size). */ ret = lseek(prime_fd, 0, SEEK_END); if (ret != -1) bo->size = ret; bo->bufmgr = bufmgr; bo->gem_handle = handle; _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); bo->name = "prime"; bo->reusable = false; bo->external = true; bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1); struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) goto err; bo->tiling_mode = get_tiling.tiling_mode; bo->swizzle_mode = get_tiling.swizzle_mode; /* XXX stride is unknown */ out: mtx_unlock(&bufmgr->lock); return bo; err: bo_free(bo); mtx_unlock(&bufmgr->lock); return NULL; } static void iris_bo_make_external_locked(struct iris_bo *bo) { if (!bo->external) { _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo); bo->external = true; } } static void iris_bo_make_external(struct iris_bo *bo) { struct iris_bufmgr *bufmgr = bo->bufmgr; if (bo->external) return; mtx_lock(&bufmgr->lock); iris_bo_make_external_locked(bo); mtx_unlock(&bufmgr->lock); } int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd) { struct iris_bufmgr *bufmgr = bo->bufmgr; iris_bo_make_external(bo); if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, DRM_CLOEXEC, prime_fd) != 0) return -errno; bo->reusable = false; return 0; } uint32_t iris_bo_export_gem_handle(struct iris_bo *bo) { iris_bo_make_external(bo); return bo->gem_handle; } int iris_bo_flink(struct iris_bo *bo, uint32_t *name) { struct iris_bufmgr *bufmgr = bo->bufmgr; if (!bo->global_name) { struct drm_gem_flink flink = { .handle = bo->gem_handle }; if (drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) return -errno; mtx_lock(&bufmgr->lock); if (!bo->global_name) { iris_bo_make_external_locked(bo); bo->global_name = flink.name; _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); } mtx_unlock(&bufmgr->lock); bo->reusable = false; } *name = bo->global_name; return 0; } static void add_bucket(struct iris_bufmgr *bufmgr, int size) { unsigned int i = bufmgr->num_buckets; assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); list_inithead(&bufmgr->cache_bucket[i].head); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); } static void init_cache_buckets(struct iris_bufmgr *bufmgr) { uint64_t size, cache_max_size = 64 * 1024 * 1024; /* OK, so power of two buckets was too wasteful of memory. * Give 3 other sizes between each power of two, to hopefully * cover things accurately enough. (The alternative is * probably to just go for exact matching of sizes, and assume * that for things like composited window resize the tiled * width/height alignment and rounding of sizes to pages will * get us useful cache hit rates anyway) */ add_bucket(bufmgr, PAGE_SIZE); add_bucket(bufmgr, PAGE_SIZE * 2); add_bucket(bufmgr, PAGE_SIZE * 3); /* Initialize the linked lists for BO reuse cache. */ for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) { add_bucket(bufmgr, size); add_bucket(bufmgr, size + size * 1 / 4); add_bucket(bufmgr, size + size * 2 / 4); add_bucket(bufmgr, size + size * 3 / 4); } }