void r600_flush_emit(struct r600_context *rctx) { struct radeon_winsys_cs *cs = rctx->b.gfx.cs; unsigned cp_coher_cntl = 0; unsigned wait_until = 0; if (!rctx->b.flags) { return; } if (rctx->b.flags & R600_CONTEXT_WAIT_3D_IDLE) { wait_until |= S_008040_WAIT_3D_IDLE(1); } if (rctx->b.flags & R600_CONTEXT_WAIT_CP_DMA_IDLE) { wait_until |= S_008040_WAIT_CP_DMA_IDLE(1); } if (wait_until) { /* Use of WAIT_UNTIL is deprecated on Cayman+ */ if (rctx->b.family >= CHIP_CAYMAN) { /* emit a PS partial flush on Cayman/TN */ rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; } } if (rctx->b.flags & R600_CONTEXT_PS_PARTIAL_FLUSH) { cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); } if (rctx->b.chip_class >= R700 && (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) { cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0); } if (rctx->b.chip_class >= R700 && (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) { cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0); /* Set FULL_CACHE_ENA for DB META flushes on r7xx and later. * * This hack predates use of FLUSH_AND_INV_DB_META, so it's * unclear whether it's still needed or even whether it has * any effect. */ cp_coher_cntl |= S_0085F0_FULL_CACHE_ENA(1); } if (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV || (rctx->b.chip_class == R600 && rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH)) { cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); } if (rctx->b.flags & R600_CONTEXT_INV_CONST_CACHE) { /* Direct constant addressing uses the shader cache. * Indirect contant addressing uses the vertex cache. */ cp_coher_cntl |= S_0085F0_SH_ACTION_ENA(1) | (rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) : S_0085F0_TC_ACTION_ENA(1)); } if (rctx->b.flags & R600_CONTEXT_INV_VERTEX_CACHE) { cp_coher_cntl |= rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) : S_0085F0_TC_ACTION_ENA(1); } if (rctx->b.flags & R600_CONTEXT_INV_TEX_CACHE) { /* Textures use the texture cache. * Texture buffer objects use the vertex cache. */ cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1) | (rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) : 0); } /* Don't use the DB CP COHER logic on r6xx. * There are hw bugs. */ if (rctx->b.chip_class >= R700 && (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB)) { cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) | S_0085F0_SMX_ACTION_ENA(1); } /* Don't use the CB CP COHER logic on r6xx. * There are hw bugs. */ if (rctx->b.chip_class >= R700 && (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB)) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1) | S_0085F0_SMX_ACTION_ENA(1); if (rctx->b.chip_class >= EVERGREEN) cp_coher_cntl |= S_0085F0_CB8_DEST_BASE_ENA(1) | S_0085F0_CB9_DEST_BASE_ENA(1) | S_0085F0_CB10_DEST_BASE_ENA(1) | S_0085F0_CB11_DEST_BASE_ENA(1); } if (rctx->b.chip_class >= R700 && rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH) { cp_coher_cntl |= S_0085F0_SO0_DEST_BASE_ENA(1) | S_0085F0_SO1_DEST_BASE_ENA(1) | S_0085F0_SO2_DEST_BASE_ENA(1) | S_0085F0_SO3_DEST_BASE_ENA(1) | S_0085F0_SMX_ACTION_ENA(1); } /* Workaround for buggy flushing on some R6xx chipsets. */ if ((rctx->b.flags & (R600_CONTEXT_FLUSH_AND_INV | R600_CONTEXT_STREAMOUT_FLUSH)) && (rctx->b.family == CHIP_RV670 || rctx->b.family == CHIP_RS780 || rctx->b.family == CHIP_RS880)) { cp_coher_cntl |= S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_DEST_BASE_0_ENA(1); } if (cp_coher_cntl) { cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); cs->buf[cs->cdw++] = cp_coher_cntl; /* CP_COHER_CNTL */ cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ } if (wait_until) { /* Use of WAIT_UNTIL is deprecated on Cayman+ */ if (rctx->b.family < CHIP_CAYMAN) { /* wait for things to settle */ radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, wait_until); } } /* everything is properly flushed */ rctx->b.flags = 0; }
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) { enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class; unsigned cp_coher_cntl = 0; radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_ICACHE) cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) { cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); if (chip_class >= VI) cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); } if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | EVENT_INDEX(5)); radeon_emit(cmd_buffer->cs, 0); radeon_emit(cmd_buffer->cs, 0); radeon_emit(cmd_buffer->cs, 0); radeon_emit(cmd_buffer->cs, 0); } } if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); } if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); } if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); } if (!(cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB))) { if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } else if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } } if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } /* VGT state sync */ if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_VGT_FLUSH) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); } /* Make sure ME is idle (it executes most packets) before continuing. * This prevents read-after-write hazards between PFP and ME. */ if (cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cmd_buffer->cs, 0); } /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle. * Therefore, it should be last. Done in PFP. */ if (cp_coher_cntl) { /* ACQUIRE_MEM is only required on a compute ring. */ radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); radeon_emit(cmd_buffer->cs, cp_coher_cntl); /* CP_COHER_CNTL */ radeon_emit(cmd_buffer->cs, 0xffffffff); /* CP_COHER_SIZE */ radeon_emit(cmd_buffer->cs, 0); /* CP_COHER_BASE */ radeon_emit(cmd_buffer->cs, 0x0000000A); /* POLL_INTERVAL */ } cmd_buffer->state.flush_bits = 0; }