Beispiel #1
0
void r600_flush_emit(struct r600_context *rctx)
{
	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
	unsigned cp_coher_cntl = 0;
	unsigned wait_until = 0;

	if (!rctx->b.flags) {
		return;
	}

	if (rctx->b.flags & R600_CONTEXT_WAIT_3D_IDLE) {
		wait_until |= S_008040_WAIT_3D_IDLE(1);
	}
	if (rctx->b.flags & R600_CONTEXT_WAIT_CP_DMA_IDLE) {
		wait_until |= S_008040_WAIT_CP_DMA_IDLE(1);
	}

	if (wait_until) {
		/* Use of WAIT_UNTIL is deprecated on Cayman+ */
		if (rctx->b.family >= CHIP_CAYMAN) {
			/* emit a PS partial flush on Cayman/TN */
			rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
		}
	}

	if (rctx->b.flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
	}

	if (rctx->b.chip_class >= R700 &&
	    (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) {
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0);
	}

	if (rctx->b.chip_class >= R700 &&
	    (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) {
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0);

		/* Set FULL_CACHE_ENA for DB META flushes on r7xx and later.
		 *
		 * This hack predates use of FLUSH_AND_INV_DB_META, so it's
		 * unclear whether it's still needed or even whether it has
		 * any effect.
		 */
		cp_coher_cntl |= S_0085F0_FULL_CACHE_ENA(1);
	}

	if (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV ||
	    (rctx->b.chip_class == R600 && rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH)) {
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
	}

	if (rctx->b.flags & R600_CONTEXT_INV_CONST_CACHE) {
		/* Direct constant addressing uses the shader cache.
		 * Indirect contant addressing uses the vertex cache. */
		cp_coher_cntl |= S_0085F0_SH_ACTION_ENA(1) |
				 (rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1)
							 : S_0085F0_TC_ACTION_ENA(1));
	}
	if (rctx->b.flags & R600_CONTEXT_INV_VERTEX_CACHE) {
		cp_coher_cntl |= rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1)
							: S_0085F0_TC_ACTION_ENA(1);
	}
	if (rctx->b.flags & R600_CONTEXT_INV_TEX_CACHE) {
		/* Textures use the texture cache.
		 * Texture buffer objects use the vertex cache. */
		cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1) |
				 (rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) : 0);
	}

	/* Don't use the DB CP COHER logic on r6xx.
	 * There are hw bugs.
	 */
	if (rctx->b.chip_class >= R700 &&
	    (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB)) {
		cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
				S_0085F0_DB_DEST_BASE_ENA(1) |
				S_0085F0_SMX_ACTION_ENA(1);
	}

	/* Don't use the CB CP COHER logic on r6xx.
	 * There are hw bugs.
	 */
	if (rctx->b.chip_class >= R700 &&
	    (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB)) {
		cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
				S_0085F0_CB0_DEST_BASE_ENA(1) |
				S_0085F0_CB1_DEST_BASE_ENA(1) |
				S_0085F0_CB2_DEST_BASE_ENA(1) |
				S_0085F0_CB3_DEST_BASE_ENA(1) |
				S_0085F0_CB4_DEST_BASE_ENA(1) |
				S_0085F0_CB5_DEST_BASE_ENA(1) |
				S_0085F0_CB6_DEST_BASE_ENA(1) |
				S_0085F0_CB7_DEST_BASE_ENA(1) |
				S_0085F0_SMX_ACTION_ENA(1);
		if (rctx->b.chip_class >= EVERGREEN)
			cp_coher_cntl |= S_0085F0_CB8_DEST_BASE_ENA(1) |
					S_0085F0_CB9_DEST_BASE_ENA(1) |
					S_0085F0_CB10_DEST_BASE_ENA(1) |
					S_0085F0_CB11_DEST_BASE_ENA(1);
	}

	if (rctx->b.chip_class >= R700 &&
	    rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH) {
		cp_coher_cntl |= S_0085F0_SO0_DEST_BASE_ENA(1) |
				S_0085F0_SO1_DEST_BASE_ENA(1) |
				S_0085F0_SO2_DEST_BASE_ENA(1) |
				S_0085F0_SO3_DEST_BASE_ENA(1) |
				S_0085F0_SMX_ACTION_ENA(1);
	}

	/* Workaround for buggy flushing on some R6xx chipsets. */
	if ((rctx->b.flags & (R600_CONTEXT_FLUSH_AND_INV |
			      R600_CONTEXT_STREAMOUT_FLUSH)) &&
	    (rctx->b.family == CHIP_RV670 ||
	     rctx->b.family == CHIP_RS780 ||
	     rctx->b.family == CHIP_RS880)) {
		cp_coher_cntl |=  S_0085F0_CB1_DEST_BASE_ENA(1) |
				  S_0085F0_DEST_BASE_0_ENA(1);
	}

	if (cp_coher_cntl) {
		cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
		cs->buf[cs->cdw++] = cp_coher_cntl;   /* CP_COHER_CNTL */
		cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
		cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
		cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
	}

	if (wait_until) {
		/* Use of WAIT_UNTIL is deprecated on Cayman+ */
		if (rctx->b.family < CHIP_CAYMAN) {
			/* wait for things to settle */
			radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
		}
	}

	/* everything is properly flushed */
	rctx->b.flags = 0;
}
Beispiel #2
0
void
si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
{
	enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
	unsigned cp_coher_cntl = 0;

	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);

	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_ICACHE)
		cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
		cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
		cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
		cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
		if (chip_class >= VI)
			cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
	}

	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
		cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
			S_0085F0_CB0_DEST_BASE_ENA(1) |
			S_0085F0_CB1_DEST_BASE_ENA(1) |
			S_0085F0_CB2_DEST_BASE_ENA(1) |
			S_0085F0_CB3_DEST_BASE_ENA(1) |
			S_0085F0_CB4_DEST_BASE_ENA(1) |
			S_0085F0_CB5_DEST_BASE_ENA(1) |
			S_0085F0_CB6_DEST_BASE_ENA(1) |
			S_0085F0_CB7_DEST_BASE_ENA(1);

		/* Necessary for DCC */
		if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI) {
			radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
			radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
			                            EVENT_INDEX(5));
			radeon_emit(cmd_buffer->cs, 0);
			radeon_emit(cmd_buffer->cs, 0);
			radeon_emit(cmd_buffer->cs, 0);
			radeon_emit(cmd_buffer->cs, 0);
		}
	}

	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
		cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
			S_0085F0_DB_DEST_BASE_ENA(1);
	}

	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
	}

	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
	}

	if (!(cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
					      RADV_CMD_FLAG_FLUSH_AND_INV_DB))) {
		if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
			radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
			radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
		} else if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
			radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
			radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
		}
	}

	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
	}

	/* VGT state sync */
	if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
	}

	/* Make sure ME is idle (it executes most packets) before continuing.
	 * This prevents read-after-write hazards between PFP and ME.
	 */
	if (cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
		radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
		radeon_emit(cmd_buffer->cs, 0);
	}

	/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
	 * Therefore, it should be last. Done in PFP.
	 */
	if (cp_coher_cntl) {
		/* ACQUIRE_MEM is only required on a compute ring. */
		radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
		radeon_emit(cmd_buffer->cs, cp_coher_cntl);   /* CP_COHER_CNTL */
		radeon_emit(cmd_buffer->cs, 0xffffffff);      /* CP_COHER_SIZE */
		radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE */
		radeon_emit(cmd_buffer->cs, 0x0000000A);      /* POLL_INTERVAL */
	}

	cmd_buffer->state.flush_bits = 0;
}