void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *ctx = context; struct radeon_winsys_cs *cs = ctx->b.gfx.cs; if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence) return; r600_preflush_suspend_features(&ctx->b); /* flush the framebuffer cache */ ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV | R600_CONTEXT_FLUSH_AND_INV_CB | R600_CONTEXT_FLUSH_AND_INV_DB | R600_CONTEXT_FLUSH_AND_INV_CB_META | R600_CONTEXT_FLUSH_AND_INV_DB_META | R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_WAIT_CP_DMA_IDLE; r600_flush_emit(ctx); /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ if (ctx->b.chip_class == R600) { radeon_set_context_reg(cs, R_028350_SX_MISC, 0); } /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; /* Flush the CS. */ ctx->b.ws->cs_flush(cs, flags, fence); r600_begin_new_cs(ctx); }
void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *ctx = context; struct radeon_winsys_cs *cs = ctx->b.gfx.cs; struct radeon_winsys *ws = ctx->b.ws; if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size) && (!fence || ctx->b.last_gfx_fence)) { if (fence) ws->fence_reference(fence, ctx->b.last_gfx_fence); if (!(flags & RADEON_FLUSH_ASYNC)) ws->cs_sync_flush(cs); return; } r600_preflush_suspend_features(&ctx->b); /* flush the framebuffer cache */ ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV | R600_CONTEXT_FLUSH_AND_INV_CB | R600_CONTEXT_FLUSH_AND_INV_DB | R600_CONTEXT_FLUSH_AND_INV_CB_META | R600_CONTEXT_FLUSH_AND_INV_DB_META | R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_WAIT_CP_DMA_IDLE; r600_flush_emit(ctx); /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ if (ctx->b.chip_class == R600) { radeon_set_context_reg(cs, R_028350_SX_MISC, 0); } /* Flush the CS. */ ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); if (fence) ws->fence_reference(fence, ctx->b.last_gfx_fence); ctx->b.num_gfx_cs_flushes++; r600_begin_new_cs(ctx); }
void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, uint64_t dst_offset, struct pipe_resource *src, uint64_t src_offset, unsigned size) { struct radeon_winsys_cs *cs = rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); dst_offset += r600_resource(dst)->gpu_address; src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resources are bound. */ rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE | R600_CONTEXT_FLUSH_AND_INV | R600_CONTEXT_FLUSH_AND_INV_CB | R600_CONTEXT_FLUSH_AND_INV_DB | R600_CONTEXT_FLUSH_AND_INV_CB_META | R600_CONTEXT_FLUSH_AND_INV_DB_META | R600_CONTEXT_STREAMOUT_FLUSH | R600_CONTEXT_WAIT_3D_IDLE; /* There are differences between R700 and EG in CP DMA, * but we only use the common bits here. */ while (size) { unsigned sync = 0; unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); unsigned src_reloc, dst_reloc; r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { r600_flush_emit(rctx); } /* Do the synchronization after the last copy, so that all data is written to memory. */ if (size == byte_count) { sync = PKT3_CP_DMA_CP_SYNC; } /* This must be done after r600_need_cs_space. */ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src, RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, sync | ((src_offset >> 32) & 0xff)); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */ radeon_emit(cs, dst_offset); /* DST_ADDR_LO [31:0] */ radeon_emit(cs, (dst_offset >> 32) & 0xff); /* DST_ADDR_HI [7:0] */ radeon_emit(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, src_reloc); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, dst_reloc); size -= byte_count; src_offset += byte_count; dst_offset += byte_count; } /* Invalidate the read caches. */ rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE; }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->cs; int i; struct r600_resource *onebo = NULL; struct r600_pipe_state *cb_state; struct evergreen_compute_resource *resources = ctx->cs_shader_state.shader->resources; /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom); ctx->flags |= R600_CONTEXT_CB_FLUSH; r600_flush_emit(ctx); /* Emit cb_state */ cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE); /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); for (i = 0; i < get_compute_resource_num(); i++) { if (resources[i].enabled) { int j; COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw); for (j = 0; j < resources[i].cs_end; j++) { if (resources[i].do_reloc[j]) { assert(resources[i].bo); evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } cs->buf[cs->cdw++] = resources[i].cs[j]; } if (resources[i].bo) { onebo = resources[i].bo; evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); ///special case for textures if (resources[i].do_reloc [resources[i].cs_end] == 2) { evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } } } } /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->flags |= R600_CONTEXT_CB_FLUSH; r600_flush_emit(ctx); #if 0 COMPUTE_DBG("cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]); } #endif ctx->ws->cs_flush(ctx->cs, RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE); ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; COMPUTE_DBG("shader started\n"); ctx->ws->buffer_wait(onebo->buf, 0); COMPUTE_DBG("...\n"); ctx->streamout_start = TRUE; ctx->streamout_append_bitmask = ~0; }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; unsigned flush_flags = 0; int i; /* make sure that the gfx ring is only one active */ if (ctx->rings.dma.cs) { ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC); } /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); /* Emit colorbuffers. */ for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE); r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); r600_write_value(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ r600_write_value(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ r600_write_value(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ r600_write_value(cs, cb->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, reloc); if (!ctx->keep_tiling_flags) { r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, reloc); } r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, reloc); } /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit constant buffer state */ r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; r600_flush_emit(ctx); #if 0 COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, ctx->cs->buf[i]); } #endif flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE; if (ctx->keep_tiling_flags) { flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; } ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags, ctx->screen->cs_count++); ctx->flags = 0; COMPUTE_DBG(ctx->screen, "shader started\n"); }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->b.gfx.cs; unsigned i; /* make sure that the gfx ring is only one active */ if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) { ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); } /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); /* emit config state */ if (ctx->b.chip_class == EVERGREEN) r600_emit_atom(ctx, &ctx->config_state.atom); ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); /* Emit colorbuffers. */ /* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */ for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER); radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ radeon_emit(cs, cb->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ radeon_emit(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, reloc); if (!ctx->keep_tiling_flags) { radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ radeon_emit(cs, reloc); } radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ radeon_emit(cs, reloc); } if (ctx->keep_tiling_flags) { for (; i < 8 ; i++) { radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, S_028C70_FORMAT(V_028C70_COLOR_INVALID)); } for (; i < 12; i++) { radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, S_028C70_FORMAT(V_028C70_COLOR_INVALID)); } } /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit constant buffer state */ r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); /* Emit sampler state */ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom); /* Emit sampler view (texture resource) state */ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE; r600_flush_emit(ctx); ctx->b.flags = 0; if (ctx->b.chip_class >= CAYMAN) { cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4); /* DEALLOC_STATE prevents the GPU from hanging when a * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set. */ cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0); cs->buf[cs->cdw++] = 0; } #if 0 COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]); } #endif }
void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, uint64_t dst_offset, struct pipe_resource *src, uint64_t src_offset, unsigned size) { struct radeon_winsys_cs *cs = rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); dst_offset += r600_resource(dst)->gpu_address; src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resources are bound. */ rctx->b.flags |= r600_get_flush_flags(R600_COHERENCY_SHADER) | R600_CONTEXT_WAIT_3D_IDLE; /* There are differences between R700 and EG in CP DMA, * but we only use the common bits here. */ while (size) { unsigned sync = 0; unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); unsigned src_reloc, dst_reloc; r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { r600_flush_emit(rctx); } /* Do the synchronization after the last copy, so that all data is written to memory. */ if (size == byte_count) { sync = PKT3_CP_DMA_CP_SYNC; } /* This must be done after r600_need_cs_space. */ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src, RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, sync | ((src_offset >> 32) & 0xff)); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */ radeon_emit(cs, dst_offset); /* DST_ADDR_LO [31:0] */ radeon_emit(cs, (dst_offset >> 32) & 0xff); /* DST_ADDR_HI [7:0] */ radeon_emit(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, src_reloc); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, dst_reloc); size -= byte_count; src_offset += byte_count; dst_offset += byte_count; } /* CP_DMA_CP_SYNC doesn't wait for idle on R6xx, but this does. */ if (rctx->b.chip_class == R600) radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_CP_DMA_IDLE(1)); /* CP DMA is executed in ME, but index buffers are read by PFP. * This ensures that ME (CP DMA) is idle before PFP starts fetching * indices. If we wanted to execute CP DMA in PFP, this packet * should precede it. */ r600_emit_pfp_sync_me(rctx); }
void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *ctx = context; struct radeon_cmdbuf *cs = ctx->b.gfx.cs; struct radeon_winsys *ws = ctx->b.ws; if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size)) return; if (r600_check_device_reset(&ctx->b)) return; r600_preflush_suspend_features(&ctx->b); /* flush the framebuffer cache */ ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV | R600_CONTEXT_FLUSH_AND_INV_CB | R600_CONTEXT_FLUSH_AND_INV_DB | R600_CONTEXT_FLUSH_AND_INV_CB_META | R600_CONTEXT_FLUSH_AND_INV_DB_META | R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_WAIT_CP_DMA_IDLE; r600_flush_emit(ctx); if (ctx->trace_buf) eg_trace_emit(ctx); /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ if (ctx->b.chip_class == R600) { radeon_set_context_reg(cs, R_028350_SX_MISC, 0); } if (ctx->is_debug) { /* Save the IB for debug contexts. */ radeon_clear_saved_cs(&ctx->last_gfx); radeon_save_cs(ws, cs, &ctx->last_gfx, true); r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); r600_resource_reference(&ctx->trace_buf, NULL); } /* Flush the CS. */ ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); if (fence) ws->fence_reference(fence, ctx->b.last_gfx_fence); ctx->b.num_gfx_cs_flushes++; if (ctx->is_debug) { if (!ws->fence_wait(ws, ctx->b.last_gfx_fence, 10000000)) { const char *fname = getenv("R600_TRACE"); if (!fname) exit(-1); FILE *fl = fopen(fname, "w+"); if (fl) { eg_dump_debug_state(&ctx->b.b, fl, 0); fclose(fl); } else perror(fname); exit(-1); } } r600_begin_new_cs(ctx); }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; unsigned flush_flags = 0; int i; struct r600_resource *onebo = NULL; struct evergreen_compute_resource *resources = ctx->cs_shader_state.shader->resources; /* make sure that the gfx ring is only one active */ if (ctx->rings.dma.cs) { ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC); } /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); /* Emit colorbuffers. */ for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE); r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); r600_write_value(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ r600_write_value(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ r600_write_value(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ r600_write_value(cs, cb->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, reloc); if (!ctx->keep_tiling_flags) { r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, reloc); } r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, reloc); } /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); for (i = 0; i < get_compute_resource_num(); i++) { if (resources[i].enabled) { int j; COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw); for (j = 0; j < resources[i].cs_end; j++) { if (resources[i].do_reloc[j]) { assert(resources[i].bo); evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } cs->buf[cs->cdw++] = resources[i].cs[j]; } if (resources[i].bo) { onebo = resources[i].bo; evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); ///special case for textures if (resources[i].do_reloc [resources[i].cs_end] == 2) { evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } } } } /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; r600_flush_emit(ctx); #if 0 COMPUTE_DBG("cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]); } #endif flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE; if (ctx->keep_tiling_flags) { flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; } ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags); ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; COMPUTE_DBG("shader started\n"); ctx->ws->buffer_wait(onebo->buf, 0); COMPUTE_DBG("...\n"); ctx->streamout_start = TRUE; ctx->streamout_append_bitmask = ~0; }