static bool r600_resource_commit(struct pipe_context *pctx,
				 struct pipe_resource *resource,
				 unsigned level, struct pipe_box *box,
				 bool commit)
{
	struct r600_common_context *ctx = (struct r600_common_context *)pctx;
	struct r600_resource *res = r600_resource(resource);

	/*
	 * Since buffer commitment changes cannot be pipelined, we need to
	 * (a) flush any pending commands that refer to the buffer we're about
	 *     to change, and
	 * (b) wait for threaded submit to finish, including those that were
	 *     triggered by some other, earlier operation.
	 */
	if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
	    ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
					     res->buf, RADEON_USAGE_READWRITE)) {
		ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
	}
	if (radeon_emitted(ctx->dma.cs, 0) &&
	    ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
					     res->buf, RADEON_USAGE_READWRITE)) {
		ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
	}

	ctx->ws->cs_sync_flush(ctx->dma.cs);
	ctx->ws->cs_sync_flush(ctx->gfx.cs);

	assert(resource->target == PIPE_BUFFER);

	return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
}
Exemplo n.º 2
0
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
                                      struct r600_resource *resource,
                                      unsigned usage)
{
	enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
	bool busy = false;

	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
		return ctx->ws->buffer_map(resource->buf, NULL, usage);
	}

	if (!(usage & PIPE_TRANSFER_WRITE)) {
		/* have to wait for the last write */
		rusage = RADEON_USAGE_WRITE;
	}

	if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
	    ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
					     resource->buf, rusage)) {
		if (usage & PIPE_TRANSFER_DONTBLOCK) {
			ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
			return NULL;
		} else {
			ctx->gfx.flush(ctx, 0, NULL);
			busy = true;
		}
	}
	if (radeon_emitted(ctx->dma.cs, 0) &&
	    ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
					     resource->buf, rusage)) {
		if (usage & PIPE_TRANSFER_DONTBLOCK) {
			ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
			return NULL;
		} else {
			ctx->dma.flush(ctx, 0, NULL);
			busy = true;
		}
	}

	if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
		if (usage & PIPE_TRANSFER_DONTBLOCK) {
			return NULL;
		} else {
			/* We will be wait for the GPU. Wait for any offloaded
			 * CS flush to complete to avoid busy-waiting in the winsys. */
			ctx->ws->cs_sync_flush(ctx->gfx.cs);
			if (ctx->dma.cs)
				ctx->ws->cs_sync_flush(ctx->dma.cs);
		}
	}

	/* Setting the CS to NULL will prevent doing checks we have done already. */
	return ctx->ws->buffer_map(resource->buf, NULL, usage);
}
Exemplo n.º 3
0
/* initialize */
void si_need_cs_space(struct si_context *ctx)
{
	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
	struct radeon_winsys_cs *ce_ib = ctx->ce_ib;
	struct radeon_winsys_cs *dma = ctx->b.dma.cs;

	/* Flush the DMA IB if it's not empty. */
	if (radeon_emitted(dma, 0))
		ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);

	/* There are two memory usage counters in the winsys for all buffers
	 * that have been added (cs_add_buffer) and two counters in the pipe
	 * driver for those that haven't been added yet.
	 */
	if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.gfx.cs,
						       ctx->b.vram, ctx->b.gtt))) {
		ctx->b.gtt = 0;
		ctx->b.vram = 0;
		ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
		return;
	}
	ctx->b.gtt = 0;
	ctx->b.vram = 0;

	/* If the CS is sufficiently large, don't count the space needed
	 * and just flush if there is not enough space left.
	 */
	if (unlikely(cs->cdw > cs->max_dw - 2048 ||
                     (ce_ib && ce_ib->max_dw - ce_ib->cdw <
                      si_ce_needed_cs_space())))
		ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
static void r600_flush_dma_ring(void *ctx, unsigned flags,
				struct pipe_fence_handle **fence)
{
	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
	struct radeon_winsys_cs *cs = rctx->dma.cs;
	struct radeon_saved_cs saved;
	bool check_vm =
		(rctx->screen->debug_flags & DBG_CHECK_VM) &&
		rctx->check_vm_faults;

	if (!radeon_emitted(cs, 0)) {
		if (fence)
			rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
		return;
	}

	if (check_vm)
		radeon_save_cs(rctx->ws, cs, &saved, true);

	rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
	if (fence)
		rctx->ws->fence_reference(fence, rctx->last_sdma_fence);

	if (check_vm) {
		/* Use conservative timeout 800ms, after which we won't wait any
		 * longer and assume the GPU is hung.
		 */
		rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);

		rctx->check_vm_faults(rctx, &saved, RING_DMA);
		radeon_clear_saved_cs(&saved);
	}
}
Exemplo n.º 5
0
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
			boolean count_draw_in, unsigned num_atomics)
{
	/* Flush the DMA IB if it's not empty. */
	if (radeon_emitted(ctx->b.dma.cs, 0))
		ctx->b.dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);

	if (!radeon_cs_memory_below_limit(ctx->b.screen, ctx->b.gfx.cs,
					  ctx->b.vram, ctx->b.gtt)) {
		ctx->b.gtt = 0;
		ctx->b.vram = 0;
		ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
		return;
	}
	/* all will be accounted once relocation are emited */
	ctx->b.gtt = 0;
	ctx->b.vram = 0;

	/* Check available space in CS. */
	if (count_draw_in) {
		uint64_t mask;

		/* The number of dwords all the dirty states would take. */
		mask = ctx->dirty_atoms;
		while (mask != 0)
			num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw;

		/* The upper-bound of how much space a draw command would take. */
		num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
	}

	/* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */
	num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);

	/* Count in r600_suspend_queries. */
	num_dw += ctx->b.num_cs_dw_queries_suspend;

	/* Count in streamout_end at the end of CS. */
	if (ctx->b.streamout.begin_emitted) {
		num_dw += ctx->b.streamout.num_dw_for_end;
	}

	/* SX_MISC */
	if (ctx->b.chip_class == R600) {
		num_dw += 3;
	}

	/* Count in framebuffer cache flushes at the end of CS. */
	num_dw += R600_MAX_FLUSH_CS_DWORDS;

	/* The fence at the end of CS. */
	num_dw += 10;

	/* Flush if there's not enough space. */
	if (!ctx->b.ws->cs_check_space(ctx->b.gfx.cs, num_dw, false)) {
		ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
	}
}
Exemplo n.º 6
0
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
				     struct pb_buffer *buf,
				     enum radeon_bo_usage usage)
{
	if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
		return true;
	}
	if (radeon_emitted(ctx->dma.cs, 0) &&
	    ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
		return true;
	}
	return false;
}
Exemplo n.º 7
0
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
                         struct r600_resource *dst, struct r600_resource *src)
{
	uint64_t vram = 0, gtt = 0;

	if (dst) {
		if (dst->domains & RADEON_DOMAIN_VRAM)
			vram += dst->buf->size;
		else if (dst->domains & RADEON_DOMAIN_GTT)
			gtt += dst->buf->size;
	}
	if (src) {
		if (src->domains & RADEON_DOMAIN_VRAM)
			vram += src->buf->size;
		else if (src->domains & RADEON_DOMAIN_GTT)
			gtt += src->buf->size;
	}

	/* Flush the GFX IB if DMA depends on it. */
	if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
	    ((dst &&
	      ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
					       RADEON_USAGE_READWRITE)) ||
	     (src &&
	      ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
					       RADEON_USAGE_WRITE))))
		ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);

	/* Flush if there's not enough space, or if the memory usage per IB
	 * is too large.
	 */
	if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
	    !ctx->ws->cs_memory_below_limit(ctx->dma.cs, vram, gtt)) {
		ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
		assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
	}

	/* If GPUVM is not supported, the CS checker needs 2 entries
	 * in the buffer list per packet, which has to be done manually.
	 */
	if (ctx->screen->info.has_virtual_memory) {
		if (dst)
			radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
						  RADEON_USAGE_WRITE,
						  RADEON_PRIO_SDMA_BUFFER);
		if (src)
			radeon_add_to_buffer_list(ctx, &ctx->dma, src,
						  RADEON_USAGE_READ,
						  RADEON_PRIO_SDMA_BUFFER);
	}
}
Exemplo n.º 8
0
void r600_context_gfx_flush(void *context, unsigned flags,
			    struct pipe_fence_handle **fence)
{
	struct r600_context *ctx = context;
	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
	struct radeon_winsys *ws = ctx->b.ws;

	if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size) &&
	    (!fence || ctx->b.last_gfx_fence)) {
		if (fence)
			ws->fence_reference(fence, ctx->b.last_gfx_fence);
		if (!(flags & RADEON_FLUSH_ASYNC))
			ws->cs_sync_flush(cs);
		return;
	}

	r600_preflush_suspend_features(&ctx->b);

	/* flush the framebuffer cache */
	ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV |
		      R600_CONTEXT_FLUSH_AND_INV_CB |
		      R600_CONTEXT_FLUSH_AND_INV_DB |
		      R600_CONTEXT_FLUSH_AND_INV_CB_META |
		      R600_CONTEXT_FLUSH_AND_INV_DB_META |
		      R600_CONTEXT_WAIT_3D_IDLE |
		      R600_CONTEXT_WAIT_CP_DMA_IDLE;

	r600_flush_emit(ctx);

	/* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
	if (ctx->b.chip_class == R600) {
		radeon_set_context_reg(cs, R_028350_SX_MISC, 0);
	}

	/* Flush the CS. */
	ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence);
	if (fence)
		ws->fence_reference(fence, ctx->b.last_gfx_fence);
	ctx->b.num_gfx_cs_flushes++;

	r600_begin_new_cs(ctx);
}
Exemplo n.º 9
0
/* This is required to prevent read-after-write hazards. */
void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
{
	struct radeon_winsys_cs *cs = rctx->dma.cs;

	/* done at the end of DMA calls, so increment this. */
	rctx->num_dma_calls++;

	/* IBs using too little memory are limited by the IB submission overhead.
	 * IBs using too much memory are limited by the kernel/TTM overhead.
	 * Too long IBs create CPU-GPU pipeline bubbles and add latency.
	 *
	 * This heuristic makes sure that DMA requests are executed
	 * very soon after the call is made and lowers memory usage.
	 * It improves texture upload performance by keeping the DMA
	 * engine busy while uploads are being submitted.
	 */
	if (rctx->ws->cs_query_memory_usage(rctx->dma.cs) > 64 * 1024 * 1024) {
		rctx->dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
		return;
	}

	r600_need_dma_space(rctx, 1, NULL, NULL);

	if (!radeon_emitted(cs, 0)) /* empty queue */
		return;

	/* NOP waits for idle on Evergreen and later. */
	if (rctx->chip_class >= CIK)
		radeon_emit(cs, 0x00000000); /* NOP */
	else if (rctx->chip_class >= EVERGREEN)
		radeon_emit(cs, 0xf0000000); /* NOP */
	else {
		/* TODO: R600-R700 should use the FENCE packet.
		 * CS checker support is required. */
	}
}
Exemplo n.º 10
0
void si_context_gfx_flush(void *context, unsigned flags,
			  struct pipe_fence_handle **fence)
{
	struct si_context *ctx = context;
	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
	struct radeon_winsys *ws = ctx->b.ws;

	if (ctx->gfx_flush_in_progress)
		return;

	ctx->gfx_flush_in_progress = true;

	if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size) &&
	    (!fence || ctx->last_gfx_fence)) {
		if (fence)
			ws->fence_reference(fence, ctx->last_gfx_fence);
		if (!(flags & RADEON_FLUSH_ASYNC))
			ws->cs_sync_flush(cs);
		ctx->gfx_flush_in_progress = false;
		return;
	}

	r600_preflush_suspend_features(&ctx->b);

	ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
			SI_CONTEXT_PS_PARTIAL_FLUSH;

	/* DRM 3.1.0 doesn't flush TC for VI correctly. */
	if (ctx->b.chip_class == VI && ctx->b.screen->info.drm_minor <= 1)
		ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 |
				SI_CONTEXT_INV_VMEM_L1;

	si_emit_cache_flush(ctx, NULL);

	/* force to keep tiling flags */
	flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;

	if (ctx->trace_buf)
		si_trace_emit(ctx);

	if (ctx->is_debug) {
		unsigned i;

		/* Save the IB for debug contexts. */
		free(ctx->last_ib);
		ctx->last_ib_dw_size = cs->cdw;
		ctx->last_ib = malloc(cs->cdw * 4);
		memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
		r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
		r600_resource_reference(&ctx->trace_buf, NULL);

		/* Save the buffer list. */
		if (ctx->last_bo_list) {
			for (i = 0; i < ctx->last_bo_count; i++)
				pb_reference(&ctx->last_bo_list[i].buf, NULL);
			free(ctx->last_bo_list);
		}
		ctx->last_bo_count = ws->cs_get_buffer_list(cs, NULL);
		ctx->last_bo_list = calloc(ctx->last_bo_count,
					   sizeof(ctx->last_bo_list[0]));
		ws->cs_get_buffer_list(cs, ctx->last_bo_list);
	}

	/* Flush the CS. */
	ws->cs_flush(cs, flags, &ctx->last_gfx_fence);

	if (fence)
		ws->fence_reference(fence, ctx->last_gfx_fence);

	/* Check VM faults if needed. */
	if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
		si_check_vm_faults(ctx);

	si_begin_new_cs(ctx);
	ctx->gfx_flush_in_progress = false;
}
static void r600_flush_from_st(struct pipe_context *ctx,
			       struct pipe_fence_handle **fence,
			       unsigned flags)
{
	struct pipe_screen *screen = ctx->screen;
	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
	struct radeon_winsys *ws = rctx->ws;
	struct pipe_fence_handle *gfx_fence = NULL;
	struct pipe_fence_handle *sdma_fence = NULL;
	bool deferred_fence = false;
	unsigned rflags = RADEON_FLUSH_ASYNC;

	if (flags & PIPE_FLUSH_END_OF_FRAME)
		rflags |= RADEON_FLUSH_END_OF_FRAME;

	/* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
	if (rctx->dma.cs)
		rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);

	if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
		if (fence)
			ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
		if (!(flags & PIPE_FLUSH_DEFERRED))
			ws->cs_sync_flush(rctx->gfx.cs);
	} else {
		/* Instead of flushing, create a deferred fence. Constraints:
		 * - The state tracker must allow a deferred flush.
		 * - The state tracker must request a fence.
		 * Thread safety in fence_finish must be ensured by the state tracker.
		 */
		if (flags & PIPE_FLUSH_DEFERRED && fence) {
			gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
			deferred_fence = true;
		} else {
			rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
		}
	}

	/* Both engines can signal out of order, so we need to keep both fences. */
	if (fence) {
		struct r600_multi_fence *multi_fence =
			CALLOC_STRUCT(r600_multi_fence);
		if (!multi_fence) {
			ws->fence_reference(&sdma_fence, NULL);
			ws->fence_reference(&gfx_fence, NULL);
			goto finish;
		}

		multi_fence->reference.count = 1;
		/* If both fences are NULL, fence_finish will always return true. */
		multi_fence->gfx = gfx_fence;
		multi_fence->sdma = sdma_fence;

		if (deferred_fence) {
			multi_fence->gfx_unflushed.ctx = rctx;
			multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
		}

		screen->fence_reference(screen, fence, NULL);
		*fence = (struct pipe_fence_handle*)multi_fence;
	}
finish:
	if (!(flags & PIPE_FLUSH_DEFERRED)) {
		if (rctx->dma.cs)
			ws->cs_sync_flush(rctx->dma.cs);
		ws->cs_sync_flush(rctx->gfx.cs);
	}
}
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
                         struct r600_resource *dst, struct r600_resource *src)
{
	uint64_t vram = ctx->dma.cs->used_vram;
	uint64_t gtt = ctx->dma.cs->used_gart;

	if (dst) {
		vram += dst->vram_usage;
		gtt += dst->gart_usage;
	}
	if (src) {
		vram += src->vram_usage;
		gtt += src->gart_usage;
	}

	/* Flush the GFX IB if DMA depends on it. */
	if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
	    ((dst &&
	      ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
					       RADEON_USAGE_READWRITE)) ||
	     (src &&
	      ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
					       RADEON_USAGE_WRITE))))
		ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);

	/* Flush if there's not enough space, or if the memory usage per IB
	 * is too large.
	 *
	 * IBs using too little memory are limited by the IB submission overhead.
	 * IBs using too much memory are limited by the kernel/TTM overhead.
	 * Too long IBs create CPU-GPU pipeline bubbles and add latency.
	 *
	 * This heuristic makes sure that DMA requests are executed
	 * very soon after the call is made and lowers memory usage.
	 * It improves texture upload performance by keeping the DMA
	 * engine busy while uploads are being submitted.
	 */
	num_dw++; /* for emit_wait_idle below */
	if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
	    ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
	    !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
		ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
		assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
	}

	/* Wait for idle if either buffer has been used in the IB before to
	 * prevent read-after-write hazards.
	 */
	if ((dst &&
	     ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
					      RADEON_USAGE_READWRITE)) ||
	    (src &&
	     ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
					      RADEON_USAGE_WRITE)))
		r600_dma_emit_wait_idle(ctx);

	/* If GPUVM is not supported, the CS checker needs 2 entries
	 * in the buffer list per packet, which has to be done manually.
	 */
	if (ctx->screen->info.has_virtual_memory) {
		if (dst)
			radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
						  RADEON_USAGE_WRITE,
						  RADEON_PRIO_SDMA_BUFFER);
		if (src)
			radeon_add_to_buffer_list(ctx, &ctx->dma, src,
						  RADEON_USAGE_READ,
						  RADEON_PRIO_SDMA_BUFFER);
	}

	/* this function is called before all DMA calls, so increment this. */
	ctx->num_dma_calls++;
}
Exemplo n.º 13
0
static void compute_emit_cs(struct r600_context *rctx,
			    const struct pipe_grid_info *info)
{
	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
	unsigned i;

	/* make sure that the gfx ring is only one active */
	if (radeon_emitted(rctx->b.dma.cs, 0)) {
		rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
	}

	/* Initialize all the compute-related registers.
	 *
	 * See evergreen_init_atom_start_compute_cs() in this file for the list
	 * of registers initialized by the start_compute_cs_cmd atom.
	 */
	r600_emit_command_buffer(cs, &rctx->start_compute_cs_cmd);

	/* emit config state */
	if (rctx->b.chip_class == EVERGREEN)
		r600_emit_atom(rctx, &rctx->config_state.atom);

	rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
	r600_flush_emit(rctx);

	/* Emit colorbuffers. */
	/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
	for (i = 0; i < 8 && i < rctx->framebuffer.state.nr_cbufs; i++) {
		struct r600_surface *cb = (struct r600_surface*)rctx->framebuffer.state.cbufs[i];
		unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
						       (struct r600_resource*)cb->base.texture,
						       RADEON_USAGE_READWRITE,
						       RADEON_PRIO_SHADER_RW_BUFFER);

		radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
		radeon_emit(cs, cb->cb_color_info);	/* R_028C70_CB_COLOR0_INFO */
		radeon_emit(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
		radeon_emit(cs, cb->cb_color_dim);		/* R_028C78_CB_COLOR0_DIM */

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
		radeon_emit(cs, reloc);

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
		radeon_emit(cs, reloc);
	}
	for (; i < 8 ; i++)
		radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
	for (; i < 12; i++)
		radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));

	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
	radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK,
					rctx->compute_cb_target_mask);


	/* Emit vertex buffer state */
	rctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(rctx->cs_vertex_buffer_state.dirty_mask);
	r600_emit_atom(rctx, &rctx->cs_vertex_buffer_state.atom);

	/* Emit constant buffer state */
	r600_emit_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);

	/* Emit sampler state */
	r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom);

	/* Emit sampler view (texture resource) state */
	r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom);

	/* Emit compute shader state */
	r600_emit_atom(rctx, &rctx->cs_shader_state.atom);

	/* Emit dispatch state and dispatch packet */
	evergreen_emit_dispatch(rctx, info);

	/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
	 */
	rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
		      R600_CONTEXT_INV_VERTEX_CACHE |
	              R600_CONTEXT_INV_TEX_CACHE;
	r600_flush_emit(rctx);
	rctx->b.flags = 0;

	if (rctx->b.chip_class >= CAYMAN) {
		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
		/* DEALLOC_STATE prevents the GPU from hanging when a
		 * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT
		 * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set.
		 */
		radeon_emit(cs, PKT3C(PKT3_DEALLOC_STATE, 0, 0));
		radeon_emit(cs, 0);
	}

#if 0
	COMPUTE_DBG(rctx->screen, "cdw: %i\n", cs->cdw);
	for (i = 0; i < cs->cdw; i++) {
		COMPUTE_DBG(rctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]);
	}
#endif

}
Exemplo n.º 14
0
void r600_context_gfx_flush(void *context, unsigned flags,
			    struct pipe_fence_handle **fence)
{
	struct r600_context *ctx = context;
	struct radeon_cmdbuf *cs = ctx->b.gfx.cs;
	struct radeon_winsys *ws = ctx->b.ws;

	if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
		return;

	if (r600_check_device_reset(&ctx->b))
		return;

	r600_preflush_suspend_features(&ctx->b);

	/* flush the framebuffer cache */
	ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV |
		      R600_CONTEXT_FLUSH_AND_INV_CB |
		      R600_CONTEXT_FLUSH_AND_INV_DB |
		      R600_CONTEXT_FLUSH_AND_INV_CB_META |
		      R600_CONTEXT_FLUSH_AND_INV_DB_META |
		      R600_CONTEXT_WAIT_3D_IDLE |
		      R600_CONTEXT_WAIT_CP_DMA_IDLE;

	r600_flush_emit(ctx);

	if (ctx->trace_buf)
		eg_trace_emit(ctx);
	/* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
	if (ctx->b.chip_class == R600) {
		radeon_set_context_reg(cs, R_028350_SX_MISC, 0);
	}

	if (ctx->is_debug) {
		/* Save the IB for debug contexts. */
		radeon_clear_saved_cs(&ctx->last_gfx);
		radeon_save_cs(ws, cs, &ctx->last_gfx, true);
		r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
		r600_resource_reference(&ctx->trace_buf, NULL);
	}
	/* Flush the CS. */
	ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence);
	if (fence)
		ws->fence_reference(fence, ctx->b.last_gfx_fence);
	ctx->b.num_gfx_cs_flushes++;

	if (ctx->is_debug) {
		if (!ws->fence_wait(ws, ctx->b.last_gfx_fence, 10000000)) {
			const char *fname = getenv("R600_TRACE");
			if (!fname)
				exit(-1);
			FILE *fl = fopen(fname, "w+");
			if (fl) {
				eg_dump_debug_state(&ctx->b.b, fl, 0);
				fclose(fl);
			} else
				perror(fname);
			exit(-1);
		}
	}
	r600_begin_new_cs(ctx);
}