Пример #1
0
static void
intel_glFlush(GLcontext *ctx)
{
   struct intel_context *intel = intel_context(ctx);

   intel_flush(ctx, GL_TRUE);

   /* We're using glFlush as an indicator that a frame is done, which is
    * what DRI2 does before calling SwapBuffers (and means we should catch
    * people doing front-buffer rendering, as well)..
    *
    * Wait for the swapbuffers before the one we just emitted, so we don't
    * get too many swaps outstanding for apps that are GPU-heavy but not
    * CPU-heavy.
    *
    * Unfortunately, we don't have a handle to the batch containing the swap,
    * and getting our hands on that doesn't seem worth it, so we just us the
    * first batch we emitted after the last swap.
    */
   if (/* !intel->using_dri2_swapbuffers && */
       intel->first_post_swapbuffers_batch != NULL) {
      drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
      intel->first_post_swapbuffers_batch = NULL;
   }
}
int main(int argc, char **argv)
{
	int fd;
	int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4;
	double start_time, end_time;
	drm_intel_bo *dst_bo;
	drm_intel_bufmgr *bufmgr;
	struct intel_batchbuffer *batch;
	int i;

	fd = drm_open_any();

	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
	drm_intel_bufmgr_gem_enable_reuse(bufmgr);

	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);

	/* Prep loop to get us warmed up. */
	for (i = 0; i < 60; i++) {
		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
	}
	drm_intel_bo_wait_rendering(dst_bo);

	/* Do the actual timing. */
	start_time = get_time_in_secs();
	for (i = 0; i < 200; i++) {
		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
	}
	drm_intel_bo_wait_rendering(dst_bo);
	end_time = get_time_in_secs();

	printf("%d iterations in %.03f secs: %.01f MB/sec\n", i,
	       end_time - start_time,
	       (double)i * OBJECT_WIDTH * OBJECT_HEIGHT * 4 / 1024.0 / 1024.0 /
	       (end_time - start_time));

	intel_batchbuffer_free(batch);
	drm_intel_bufmgr_destroy(bufmgr);

	close(fd);

	return 0;
}
Пример #3
0
void
intelFinish(struct gl_context * ctx)
{
   struct brw_context *brw = brw_context(ctx);

   intel_glFlush(ctx);

   if (brw->batch.last_bo)
      drm_intel_bo_wait_rendering(brw->batch.last_bo);
}
Пример #4
0
void
intelFinish(struct gl_context * ctx)
{
   struct intel_context *intel = intel_context(ctx);

   intel_flush(ctx);
   intel_flush_front(ctx);

   if (intel->batch.last_bo)
      drm_intel_bo_wait_rendering(intel->batch.last_bo);
}
Пример #5
0
/* We ignore the user-supplied timeout.  This is weaselly -- we're allowed to
 * round to an implementation-dependent accuracy, and right now our
 * implementation "rounds" to the wait-forever value.
 *
 * The fix would be a new kernel function to do the GTT transition with a
 * timeout.
 */
static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
				 GLbitfield flags, GLuint64 timeout)
{
   struct intel_sync_object *sync = (struct intel_sync_object *)s;

   if (sync->bo) {
      drm_intel_bo_wait_rendering(sync->bo);
      s->StatusFlag = 1;
      drm_intel_bo_unreference(sync->bo);
      sync->bo = NULL;
   }
}
Пример #6
0
//#define DEBUG
VAStatus
media_sync_surface (MEDIA_DRV_CONTEXT * drv_ctx, VASurfaceID render_target)
{

  struct object_surface *obj_surface = SURFACE (render_target);

  MEDIA_DRV_ASSERT (obj_surface);

  if (obj_surface->bo)
    drm_intel_bo_wait_rendering (obj_surface->bo);

  return VA_STATUS_SUCCESS;
}
Пример #7
0
/**
 * intel_prepare_render should be called anywhere that curent read/drawbuffer
 * state is required.
 */
void
intel_prepare_render(struct intel_context *intel)
{
   __DRIcontext *driContext = intel->driContext;
   __DRIdrawable *drawable;

   drawable = driContext->driDrawablePriv;
   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
      if (drawable->lastStamp != drawable->dri2.stamp)
	 intel_update_renderbuffers(driContext, drawable);
      intel_draw_buffer(&intel->ctx);
      driContext->dri2.draw_stamp = drawable->dri2.stamp;
   }

   drawable = driContext->driReadablePriv;
   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
      if (drawable->lastStamp != drawable->dri2.stamp)
	 intel_update_renderbuffers(driContext, drawable);
      driContext->dri2.read_stamp = drawable->dri2.stamp;
   }

   /* If we're currently rendering to the front buffer, the rendering
    * that will happen next will probably dirty the front buffer.  So
    * mark it as dirty here.
    */
   if (intel->is_front_buffer_rendering)
      intel->front_buffer_dirty = true;

   /* Wait for the swapbuffers before the one we just emitted, so we
    * don't get too many swaps outstanding for apps that are GPU-heavy
    * but not CPU-heavy.
    *
    * We're using intelDRI2Flush (called from the loader before
    * swapbuffer) and glFlush (for front buffer rendering) as the
    * indicator that a frame is done and then throttle when we get
    * here as we prepare to render the next frame.  At this point for
    * round trips for swap/copy and getting new buffers are done and
    * we'll spend less time waiting on the GPU.
    *
    * Unfortunately, we don't have a handle to the batch containing
    * the swap, and getting our hands on that doesn't seem worth it,
    * so we just us the first batch we emitted after the last swap.
    */
   if (intel->need_throttle && intel->first_post_swapbuffers_batch) {
      if (!intel->disable_throttling)
         drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
      intel->first_post_swapbuffers_batch = NULL;
      intel->need_throttle = false;
   }
}
Пример #8
0
static int
intel_drm_fence_finish(struct intel_winsys *iws,
                       struct pipe_fence_handle *fence)
{
   struct intel_drm_fence *f = (struct intel_drm_fence *)fence;

   /* fence already expired */
   if (!f->bo)
      return 0;

   drm_intel_bo_wait_rendering(f->bo);
   drm_intel_bo_unreference(f->bo);
   f->bo = NULL;

   return 0;
}
Пример #9
0
static void cpu_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
			 struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
			 unsigned logical_tile_no)
{
	assert(batch->ptr == batch->buffer);

	if (options.ducttape)
		drm_intel_bo_wait_rendering(dst->bo);

	if (options.use_cpu_maps) {
		set_to_cpu_domain(src, 0);
		set_to_cpu_domain(dst, 1);
	}

	cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
		 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
		 logical_tile_no);
}
Пример #10
0
int
intel_bo_wait(struct intel_bo *bo, int64_t timeout)
{
   int err;

   if (timeout >= 0) {
      err = drm_intel_gem_bo_wait(gem_bo(bo), timeout);
   } else {
      drm_intel_bo_wait_rendering(gem_bo(bo));
      err = 0;
   }

   /* consider the bo idle on errors */
   if (err && err != -ETIME)
      err = 0;

   return err;
}
Пример #11
0
static void prw_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
			 struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
			 unsigned logical_tile_no)
{
	uint32_t tmp_tile[options.tile_size*options.tile_size];
	int i;

	assert(batch->ptr == batch->buffer);

	if (options.ducttape)
		drm_intel_bo_wait_rendering(dst->bo);

	if (src->tiling == I915_TILING_NONE) {
		for (i = 0; i < options.tile_size; i++) {
			unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
			drm_intel_bo_get_subdata(src->bo, ofs,
						 options.tile_size*sizeof(uint32_t),
						 tmp_tile + options.tile_size*i);
		}
	} else {
		if (options.use_cpu_maps)
			set_to_cpu_domain(src, 0);

		cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
			 tmp_tile, options.tile_size, 0, 0, logical_tile_no);
	}

	if (dst->tiling == I915_TILING_NONE) {
		for (i = 0; i < options.tile_size; i++) {
			unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
			drm_intel_bo_subdata(dst->bo, ofs,
					     options.tile_size*sizeof(uint32_t),
					     tmp_tile + options.tile_size*i);
		}
	} else {
		if (options.use_cpu_maps)
			set_to_cpu_domain(dst, 1);

		cpucpy2d(tmp_tile, options.tile_size, 0, 0,
			 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
			 logical_tile_no);
	}
}
Пример #12
0
void intel_batch_submit(ScrnInfoPtr scrn)
{
	intel_screen_private *intel = intel_get_screen_private(scrn);
	int ret;

	assert (!intel->in_batch_atomic);

	if (intel->vertex_flush)
		intel->vertex_flush(intel);
	intel_end_vertex(intel);

	if (intel->batch_flush)
		intel->batch_flush(intel);

	if (intel->batch_used == 0)
		return;

	/* Mark the end of the batchbuffer. */
	OUT_BATCH(MI_BATCH_BUFFER_END);
	/* Emit a padding dword if we aren't going to be quad-word aligned. */
	if (intel->batch_used & 1)
		OUT_BATCH(MI_NOOP);

	if (DUMP_BATCHBUFFERS) {
	    FILE *file = fopen(DUMP_BATCHBUFFERS, "a");
	    if (file) {
		fwrite (intel->batch_ptr, intel->batch_used*4, 1, file);
		fclose(file);
	    }
	}

	ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr);
	if (ret == 0) {
		ret = drm_intel_bo_mrb_exec(intel->batch_bo,
				intel->batch_used*4,
				NULL, 0, 0xffffffff,
				(HAS_BLT(intel) ?
				 intel->current_batch:
				 I915_EXEC_DEFAULT));
	}

	if (ret != 0) {
		static int once;
		if (!once) {
			if (ret == -EIO) {
				/* The GPU has hung and unlikely to recover by this point. */
				xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Detected a hung GPU, disabling acceleration.\n");
				xf86DrvMsg(scrn->scrnIndex, X_ERROR, "When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
			} else {
				/* The driver is broken. */
				xf86DrvMsg(scrn->scrnIndex, X_ERROR,
					   "Failed to submit batch buffer, expect rendering corruption: %s.\n ",
					   strerror(-ret));
			}
			uxa_set_force_fallback(xf86ScrnToScreen(scrn), TRUE);
			intel->force_fallback = TRUE;
			once = 1;
		}
	}

	while (!list_is_empty(&intel->batch_pixmaps)) {
		struct intel_pixmap *entry;

		entry = list_first_entry(&intel->batch_pixmaps,
					 struct intel_pixmap,
					 batch);

		entry->busy = -1;
		entry->dirty = 0;
		list_del(&entry->batch);
	}

	if (intel->debug_flush & DEBUG_FLUSH_WAIT)
		drm_intel_bo_wait_rendering(intel->batch_bo);

	intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT);

	if (intel->batch_commit_notify)
		intel->batch_commit_notify(intel);

	intel->current_batch = 0;
}
Пример #13
0
static void render_timeout(int fd)
{
	drm_intel_bufmgr *bufmgr;
	struct intel_batchbuffer *batch;
	int64_t timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;
	int64_t negative_timeout = -1;
	int ret;
	const bool do_signals = true; /* signals will seem to make the operation
				       * use less process CPU time */
	bool done = false;
	int i, iter = 1;

	igt_skip_on_simulation();

	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

	dst = drm_intel_bo_alloc(bufmgr, "dst", BUF_SIZE, 4096);
	dst2 = drm_intel_bo_alloc(bufmgr, "dst2", BUF_SIZE, 4096);

	igt_skip_on_f(gem_bo_wait_timeout(fd, dst->handle, &timeout) == -EINVAL,
		      "kernel doesn't support wait_timeout, skipping test\n");
	timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;

	/* Figure out a rough number of fills required to consume 1 second of
	 * GPU work.
	 */
	do {
		struct timespec start, end;
		long diff;

#ifndef CLOCK_MONOTONIC_RAW
#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
#endif

		igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &start) == 0);
		for (i = 0; i < iter; i++)
			blt_color_fill(batch, dst, BUF_PAGES);
		intel_batchbuffer_flush(batch);
		drm_intel_bo_wait_rendering(dst);
		igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &end) == 0);

		diff = do_time_diff(&end, &start);
		igt_assert(diff >= 0);

		if ((diff / MSEC_PER_SEC) > ENOUGH_WORK_IN_SECONDS)
			done = true;
		else
			iter <<= 1;
	} while (!done && iter < 1000000);

	igt_assert_lt(iter, 1000000);

	igt_info("%d iters is enough work\n", iter);
	gem_quiescent_gpu(fd);
	if (do_signals)
		igt_fork_signal_helper();

	/* We should be able to do half as much work in the same amount of time,
	 * but because we might schedule almost twice as much as required, we
	 * might accidentally time out. Hence add some fudge. */
	for (i = 0; i < iter/3; i++)
		blt_color_fill(batch, dst2, BUF_PAGES);

	intel_batchbuffer_flush(batch);
	igt_assert(gem_bo_busy(fd, dst2->handle) == true);

	igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &timeout), 0);
	igt_assert(gem_bo_busy(fd, dst2->handle) == false);
	igt_assert_neq(timeout, 0);
	if (timeout ==  (ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC))
		igt_info("Buffer was already done!\n");
	else {
		igt_info("Finished with %" PRIu64 " time remaining\n", timeout);
	}

	/* check that polling with timeout=0 works. */
	timeout = 0;
	igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &timeout), 0);
	igt_assert_eq(timeout, 0);

	/* Now check that we correctly time out, twice the auto-tune load should
	 * be good enough. */
	timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;
	for (i = 0; i < iter*2; i++)
		blt_color_fill(batch, dst2, BUF_PAGES);

	intel_batchbuffer_flush(batch);

	ret = gem_bo_wait_timeout(fd, dst2->handle, &timeout);
	igt_assert_eq(ret, -ETIME);
	igt_assert_eq(timeout, 0);
	igt_assert(gem_bo_busy(fd, dst2->handle) == true);

	/* check that polling with timeout=0 works. */
	timeout = 0;
	igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &timeout), -ETIME);
	igt_assert_eq(timeout, 0);


	/* Now check that we can pass negative (infinite) timeouts. */
	negative_timeout = -1;
	for (i = 0; i < iter; i++)
		blt_color_fill(batch, dst2, BUF_PAGES);

	intel_batchbuffer_flush(batch);

	igt_assert_eq(gem_bo_wait_timeout(fd, dst2->handle, &negative_timeout), 0);
	igt_assert_eq(negative_timeout, -1); /* infinity always remains */
	igt_assert(gem_bo_busy(fd, dst2->handle) == false);

	if (do_signals)
		igt_stop_signal_helper();
	drm_intel_bo_unreference(dst2);
	drm_intel_bo_unreference(dst);
	intel_batchbuffer_free(batch);
	drm_intel_bufmgr_destroy(bufmgr);
}