static void intel_glFlush(GLcontext *ctx) { struct intel_context *intel = intel_context(ctx); intel_flush(ctx, GL_TRUE); /* We're using glFlush as an indicator that a frame is done, which is * what DRI2 does before calling SwapBuffers (and means we should catch * people doing front-buffer rendering, as well).. * * Wait for the swapbuffers before the one we just emitted, so we don't * get too many swaps outstanding for apps that are GPU-heavy but not * CPU-heavy. * * Unfortunately, we don't have a handle to the batch containing the swap, * and getting our hands on that doesn't seem worth it, so we just us the * first batch we emitted after the last swap. */ if (/* !intel->using_dri2_swapbuffers && */ intel->first_post_swapbuffers_batch != NULL) { drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); intel->first_post_swapbuffers_batch = NULL; } }
int main(int argc, char **argv) { int fd; int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4; double start_time, end_time; drm_intel_bo *dst_bo; drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; int i; fd = drm_open_any(); bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); drm_intel_bufmgr_gem_enable_reuse(bufmgr); batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096); /* Prep loop to get us warmed up. */ for (i = 0; i < 60; i++) { do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT); } drm_intel_bo_wait_rendering(dst_bo); /* Do the actual timing. */ start_time = get_time_in_secs(); for (i = 0; i < 200; i++) { do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT); } drm_intel_bo_wait_rendering(dst_bo); end_time = get_time_in_secs(); printf("%d iterations in %.03f secs: %.01f MB/sec\n", i, end_time - start_time, (double)i * OBJECT_WIDTH * OBJECT_HEIGHT * 4 / 1024.0 / 1024.0 / (end_time - start_time)); intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); close(fd); return 0; }
void intelFinish(struct gl_context * ctx) { struct brw_context *brw = brw_context(ctx); intel_glFlush(ctx); if (brw->batch.last_bo) drm_intel_bo_wait_rendering(brw->batch.last_bo); }
void intelFinish(struct gl_context * ctx) { struct intel_context *intel = intel_context(ctx); intel_flush(ctx); intel_flush_front(ctx); if (intel->batch.last_bo) drm_intel_bo_wait_rendering(intel->batch.last_bo); }
/* We ignore the user-supplied timeout. This is weaselly -- we're allowed to * round to an implementation-dependent accuracy, and right now our * implementation "rounds" to the wait-forever value. * * The fix would be a new kernel function to do the GTT transition with a * timeout. */ static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s, GLbitfield flags, GLuint64 timeout) { struct intel_sync_object *sync = (struct intel_sync_object *)s; if (sync->bo) { drm_intel_bo_wait_rendering(sync->bo); s->StatusFlag = 1; drm_intel_bo_unreference(sync->bo); sync->bo = NULL; } }
//#define DEBUG VAStatus media_sync_surface (MEDIA_DRV_CONTEXT * drv_ctx, VASurfaceID render_target) { struct object_surface *obj_surface = SURFACE (render_target); MEDIA_DRV_ASSERT (obj_surface); if (obj_surface->bo) drm_intel_bo_wait_rendering (obj_surface->bo); return VA_STATUS_SUCCESS; }
/** * intel_prepare_render should be called anywhere that curent read/drawbuffer * state is required. */ void intel_prepare_render(struct intel_context *intel) { __DRIcontext *driContext = intel->driContext; __DRIdrawable *drawable; drawable = driContext->driDrawablePriv; if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { if (drawable->lastStamp != drawable->dri2.stamp) intel_update_renderbuffers(driContext, drawable); intel_draw_buffer(&intel->ctx); driContext->dri2.draw_stamp = drawable->dri2.stamp; } drawable = driContext->driReadablePriv; if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { if (drawable->lastStamp != drawable->dri2.stamp) intel_update_renderbuffers(driContext, drawable); driContext->dri2.read_stamp = drawable->dri2.stamp; } /* If we're currently rendering to the front buffer, the rendering * that will happen next will probably dirty the front buffer. So * mark it as dirty here. */ if (intel->is_front_buffer_rendering) intel->front_buffer_dirty = true; /* Wait for the swapbuffers before the one we just emitted, so we * don't get too many swaps outstanding for apps that are GPU-heavy * but not CPU-heavy. * * We're using intelDRI2Flush (called from the loader before * swapbuffer) and glFlush (for front buffer rendering) as the * indicator that a frame is done and then throttle when we get * here as we prepare to render the next frame. At this point for * round trips for swap/copy and getting new buffers are done and * we'll spend less time waiting on the GPU. * * Unfortunately, we don't have a handle to the batch containing * the swap, and getting our hands on that doesn't seem worth it, * so we just us the first batch we emitted after the last swap. */ if (intel->need_throttle && intel->first_post_swapbuffers_batch) { if (!intel->disable_throttling) drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); intel->first_post_swapbuffers_batch = NULL; intel->need_throttle = false; } }
static int intel_drm_fence_finish(struct intel_winsys *iws, struct pipe_fence_handle *fence) { struct intel_drm_fence *f = (struct intel_drm_fence *)fence; /* fence already expired */ if (!f->bo) return 0; drm_intel_bo_wait_rendering(f->bo); drm_intel_bo_unreference(f->bo); f->bo = NULL; return 0; }
static void cpu_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, unsigned logical_tile_no) { assert(batch->ptr == batch->buffer); if (options.ducttape) drm_intel_bo_wait_rendering(dst->bo); if (options.use_cpu_maps) { set_to_cpu_domain(src, 0); set_to_cpu_domain(dst, 1); } cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y, dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y, logical_tile_no); }
int intel_bo_wait(struct intel_bo *bo, int64_t timeout) { int err; if (timeout >= 0) { err = drm_intel_gem_bo_wait(gem_bo(bo), timeout); } else { drm_intel_bo_wait_rendering(gem_bo(bo)); err = 0; } /* consider the bo idle on errors */ if (err && err != -ETIME) err = 0; return err; }
static void prw_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, unsigned logical_tile_no) { uint32_t tmp_tile[options.tile_size*options.tile_size]; int i; assert(batch->ptr == batch->buffer); if (options.ducttape) drm_intel_bo_wait_rendering(dst->bo); if (src->tiling == I915_TILING_NONE) { for (i = 0; i < options.tile_size; i++) { unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i); drm_intel_bo_get_subdata(src->bo, ofs, options.tile_size*sizeof(uint32_t), tmp_tile + options.tile_size*i); } } else { if (options.use_cpu_maps) set_to_cpu_domain(src, 0); cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y, tmp_tile, options.tile_size, 0, 0, logical_tile_no); } if (dst->tiling == I915_TILING_NONE) { for (i = 0; i < options.tile_size; i++) { unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i); drm_intel_bo_subdata(dst->bo, ofs, options.tile_size*sizeof(uint32_t), tmp_tile + options.tile_size*i); } } else { if (options.use_cpu_maps) set_to_cpu_domain(dst, 1); cpucpy2d(tmp_tile, options.tile_size, 0, 0, dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y, logical_tile_no); } }
void intel_batch_submit(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); int ret; assert (!intel->in_batch_atomic); if (intel->vertex_flush) intel->vertex_flush(intel); intel_end_vertex(intel); if (intel->batch_flush) intel->batch_flush(intel); if (intel->batch_used == 0) return; /* Mark the end of the batchbuffer. */ OUT_BATCH(MI_BATCH_BUFFER_END); /* Emit a padding dword if we aren't going to be quad-word aligned. */ if (intel->batch_used & 1) OUT_BATCH(MI_NOOP); if (DUMP_BATCHBUFFERS) { FILE *file = fopen(DUMP_BATCHBUFFERS, "a"); if (file) { fwrite (intel->batch_ptr, intel->batch_used*4, 1, file); fclose(file); } } ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr); if (ret == 0) { ret = drm_intel_bo_mrb_exec(intel->batch_bo, intel->batch_used*4, NULL, 0, 0xffffffff, (HAS_BLT(intel) ? intel->current_batch: I915_EXEC_DEFAULT)); } if (ret != 0) { static int once; if (!once) { if (ret == -EIO) { /* The GPU has hung and unlikely to recover by this point. */ xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Detected a hung GPU, disabling acceleration.\n"); xf86DrvMsg(scrn->scrnIndex, X_ERROR, "When reporting this, please include i915_error_state from debugfs and the full dmesg.\n"); } else { /* The driver is broken. */ xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Failed to submit batch buffer, expect rendering corruption: %s.\n ", strerror(-ret)); } uxa_set_force_fallback(xf86ScrnToScreen(scrn), TRUE); intel->force_fallback = TRUE; once = 1; } } while (!list_is_empty(&intel->batch_pixmaps)) { struct intel_pixmap *entry; entry = list_first_entry(&intel->batch_pixmaps, struct intel_pixmap, batch); entry->busy = -1; entry->dirty = 0; list_del(&entry->batch); } if (intel->debug_flush & DEBUG_FLUSH_WAIT) drm_intel_bo_wait_rendering(intel->batch_bo); intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT); if (intel->batch_commit_notify) intel->batch_commit_notify(intel); intel->current_batch = 0; }
static void render_timeout(int fd) { drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; int64_t timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC; int64_t negative_timeout = -1; int ret; const bool do_signals = true; /* signals will seem to make the operation * use less process CPU time */ bool done = false; int i, iter = 1; igt_skip_on_simulation(); bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); drm_intel_bufmgr_gem_enable_reuse(bufmgr); batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); dst = drm_intel_bo_alloc(bufmgr, "dst", BUF_SIZE, 4096); dst2 = drm_intel_bo_alloc(bufmgr, "dst2", BUF_SIZE, 4096); igt_skip_on_f(gem_bo_wait_timeout(fd, dst->handle, &timeout) == -EINVAL, "kernel doesn't support wait_timeout, skipping test\n"); timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC; /* Figure out a rough number of fills required to consume 1 second of * GPU work. */ do { struct timespec start, end; long diff; #ifndef CLOCK_MONOTONIC_RAW #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC #endif igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &start) == 0); for (i = 0; i < iter; i++) blt_color_fill(batch, dst, BUF_PAGES); intel_batchbuffer_flush(batch); drm_intel_bo_wait_rendering(dst); igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &end) == 0); diff = do_time_diff(&end, &start); igt_assert(diff >= 0); if ((diff / MSEC_PER_SEC) > ENOUGH_WORK_IN_SECONDS) done = true; else iter <<= 1; } while (!done && iter < 1000000); igt_assert_lt(iter, 1000000); igt_info("%d iters is enough work\n", iter); gem_quiescent_gpu(fd); if (do_signals) igt_fork_signal_helper(); /* We should be able to do half as much work in the same amount of time, * but because we might schedule almost twice as much as required, we * might accidentally time out. Hence add some fudge. */ for (i = 0; i < iter/3; i++) blt_color_fill(batch, dst2, BUF_PAGES); intel_batchbuffer_flush(batch); igt_assert(gem_bo_busy(fd, dst2->handle) == true); igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &timeout), 0); igt_assert(gem_bo_busy(fd, dst2->handle) == false); igt_assert_neq(timeout, 0); if (timeout == (ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC)) igt_info("Buffer was already done!\n"); else { igt_info("Finished with %" PRIu64 " time remaining\n", timeout); } /* check that polling with timeout=0 works. */ timeout = 0; igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &timeout), 0); igt_assert_eq(timeout, 0); /* Now check that we correctly time out, twice the auto-tune load should * be good enough. */ timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC; for (i = 0; i < iter*2; i++) blt_color_fill(batch, dst2, BUF_PAGES); intel_batchbuffer_flush(batch); ret = gem_bo_wait_timeout(fd, dst2->handle, &timeout); igt_assert_eq(ret, -ETIME); igt_assert_eq(timeout, 0); igt_assert(gem_bo_busy(fd, dst2->handle) == true); /* check that polling with timeout=0 works. */ timeout = 0; igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &timeout), -ETIME); igt_assert_eq(timeout, 0); /* Now check that we can pass negative (infinite) timeouts. */ negative_timeout = -1; for (i = 0; i < iter; i++) blt_color_fill(batch, dst2, BUF_PAGES); intel_batchbuffer_flush(batch); igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &negative_timeout), 0); igt_assert_eq(negative_timeout, -1); /* infinity always remains */ igt_assert(gem_bo_busy(fd, dst2->handle) == false); if (do_signals) igt_stop_signal_helper(); drm_intel_bo_unreference(dst2); drm_intel_bo_unreference(dst); intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); }