示例#1
0
void r600_begin_new_cs(struct r600_context *ctx)
{
	unsigned shader;

	ctx->b.flags = 0;
	ctx->b.gtt = 0;
	ctx->b.vram = 0;

	/* Begin a new CS. */
	r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd);

	/* Re-emit states. */
	r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->blend_color.atom);
	r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->clip_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->db_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
	r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom);
	r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
	ctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
	ctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
	r600_mark_atom_dirty(ctx, &ctx->scissor.atom);
	ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
	ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
	r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
	if (ctx->b.chip_class <= EVERGREEN) {
		r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
	}
	r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
	r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom);
	r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_ES].atom);
	r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom);
	if (ctx->gs_shader) {
		r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_GS].atom);
		r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom);
	}
	if (ctx->tes_shader) {
		r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_HS].atom);
		r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_LS].atom);
	}
	r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_VS].atom);
	r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
	r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);

	if (ctx->blend_state.cso)
		r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
	if (ctx->dsa_state.cso)
		r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom);
	if (ctx->rasterizer_state.cso)
		r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom);

	if (ctx->b.chip_class <= R700) {
		r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
	}

	ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
	r600_vertex_buffers_dirty(ctx);

	/* Re-emit shader resources. */
	for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
		struct r600_constbuf_state *constbuf = &ctx->constbuf_state[shader];
		struct r600_textures_info *samplers = &ctx->samplers[shader];

		constbuf->dirty_mask = constbuf->enabled_mask;
		samplers->views.dirty_mask = samplers->views.enabled_mask;
		samplers->states.dirty_mask = samplers->states.enabled_mask;

		r600_constant_buffers_dirty(ctx, constbuf);
		r600_sampler_views_dirty(ctx, &samplers->views);
		r600_sampler_states_dirty(ctx, &samplers->states);
	}

	r600_postflush_resume_features(&ctx->b);

	/* Re-emit the draw state. */
	ctx->last_primitive_type = -1;
	ctx->last_start_instance = -1;

	ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;
}
示例#2
0
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
		const uint *grid_layout)
{
	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
	unsigned flush_flags = 0;
	int i;

	/* make sure that the gfx ring is only one active */
	if (ctx->rings.dma.cs) {
		ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
	}

	/* Initialize all the compute-related registers.
	 *
	 * See evergreen_init_atom_start_compute_cs() in this file for the list
	 * of registers initialized by the start_compute_cs_cmd atom.
	 */
	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);

	ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
	r600_flush_emit(ctx);

	/* Emit colorbuffers. */
	for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) {
		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
		unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx,
						       (struct r600_resource*)cb->base.texture,
						       RADEON_USAGE_READWRITE);

		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
		r600_write_value(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
		r600_write_value(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
		r600_write_value(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
		r600_write_value(cs, cb->cb_color_info);	/* R_028C70_CB_COLOR0_INFO */
		r600_write_value(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, cb->cb_color_dim);		/* R_028C78_CB_COLOR0_DIM */

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, reloc);

		if (!ctx->keep_tiling_flags) {
			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
			r600_write_value(cs, reloc);
		}

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, reloc);
	}

	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
	r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
					ctx->compute_cb_target_mask);


	/* Emit vertex buffer state */
	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);

	/* Emit constant buffer state */
	r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);

	/* Emit compute shader state */
	r600_emit_atom(ctx, &ctx->cs_shader_state.atom);

	/* Emit dispatch state and dispatch packet */
	evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);

	/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
	 */
	ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
	r600_flush_emit(ctx);

#if 0
	COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw);
	for (i = 0; i < cs->cdw; i++) {
		COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, ctx->cs->buf[i]);
	}
#endif

	flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE;
	if (ctx->keep_tiling_flags) {
		flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
	}

	ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags, ctx->screen->cs_count++);

	ctx->flags = 0;

	COMPUTE_DBG(ctx->screen, "shader started\n");
}
示例#3
0
void r600_begin_new_cs(struct r600_context *ctx)
{
	unsigned shader;

	if (ctx->is_debug) {
		uint32_t zero = 0;

		/* Create a buffer used for writing trace IDs and initialize it to 0. */
		assert(!ctx->trace_buf);
		ctx->trace_buf = (struct r600_resource*)
			pipe_buffer_create(ctx->b.b.screen, 0,
					   PIPE_USAGE_STAGING, 4);
		if (ctx->trace_buf)
			pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
						    0, sizeof(zero), &zero);
		ctx->trace_id = 0;
	}

	if (ctx->trace_buf)
		eg_trace_emit(ctx);

	ctx->b.flags = 0;
	ctx->b.gtt = 0;
	ctx->b.vram = 0;

	/* Begin a new CS. */
	r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd);

	/* Re-emit states. */
	r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->blend_color.atom);
	r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->clip_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->db_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
	if (ctx->b.chip_class >= EVERGREEN) {
		r600_mark_atom_dirty(ctx, &ctx->fragment_images.atom);
		r600_mark_atom_dirty(ctx, &ctx->fragment_buffers.atom);
		r600_mark_atom_dirty(ctx, &ctx->compute_images.atom);
		r600_mark_atom_dirty(ctx, &ctx->compute_buffers.atom);
	}
	r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom);
	r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
	r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
	ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
	r600_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
	ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
	ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
	r600_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
	if (ctx->b.chip_class <= EVERGREEN) {
		r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
	}
	r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
	r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom);
	r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_ES].atom);
	r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom);
	if (ctx->gs_shader) {
		r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_GS].atom);
		r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom);
	}
	if (ctx->tes_shader) {
		r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_HS].atom);
		r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_LS].atom);
	}
	r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_VS].atom);
	r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
	r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);

	if (ctx->blend_state.cso)
		r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
	if (ctx->dsa_state.cso)
		r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom);
	if (ctx->rasterizer_state.cso)
		r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom);

	if (ctx->b.chip_class <= R700) {
		r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
	}

	ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
	r600_vertex_buffers_dirty(ctx);

	/* Re-emit shader resources. */
	for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
		struct r600_constbuf_state *constbuf = &ctx->constbuf_state[shader];
		struct r600_textures_info *samplers = &ctx->samplers[shader];

		constbuf->dirty_mask = constbuf->enabled_mask;
		samplers->views.dirty_mask = samplers->views.enabled_mask;
		samplers->states.dirty_mask = samplers->states.enabled_mask;

		r600_constant_buffers_dirty(ctx, constbuf);
		r600_sampler_views_dirty(ctx, &samplers->views);
		r600_sampler_states_dirty(ctx, &samplers->states);
	}

	for (shader = 0; shader < ARRAY_SIZE(ctx->scratch_buffers); shader++) {
		ctx->scratch_buffers[shader].dirty = true;
	}

	r600_postflush_resume_features(&ctx->b);

	/* Re-emit the draw state. */
	ctx->last_primitive_type = -1;
	ctx->last_start_instance = -1;
	ctx->last_rast_prim      = -1;
	ctx->current_rast_prim   = -1;

	assert(!ctx->b.gfx.cs->prev_dw);
	ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
}
示例#4
0
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
		const uint *grid_layout)
{
	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
	unsigned i;

	/* make sure that the gfx ring is only one active */
	if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) {
		ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
	}

	/* Initialize all the compute-related registers.
	 *
	 * See evergreen_init_atom_start_compute_cs() in this file for the list
	 * of registers initialized by the start_compute_cs_cmd atom.
	 */
	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);

	/* emit config state */
	if (ctx->b.chip_class == EVERGREEN)
		r600_emit_atom(ctx, &ctx->config_state.atom);

	ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
	r600_flush_emit(ctx);

	/* Emit colorbuffers. */
	/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
	for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
		unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx,
						       (struct r600_resource*)cb->base.texture,
						       RADEON_USAGE_READWRITE,
						       RADEON_PRIO_SHADER_RW_BUFFER);

		radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
		radeon_emit(cs, cb->cb_color_info);	/* R_028C70_CB_COLOR0_INFO */
		radeon_emit(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
		radeon_emit(cs, cb->cb_color_dim);		/* R_028C78_CB_COLOR0_DIM */

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
		radeon_emit(cs, reloc);

		if (!ctx->keep_tiling_flags) {
			radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
			radeon_emit(cs, reloc);
		}

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
		radeon_emit(cs, reloc);
	}
	if (ctx->keep_tiling_flags) {
		for (; i < 8 ; i++) {
			radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
						       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
		}
		for (; i < 12; i++) {
			radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
						       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
		}
	}

	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
	radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK,
					ctx->compute_cb_target_mask);


	/* Emit vertex buffer state */
	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);

	/* Emit constant buffer state */
	r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);

	/* Emit sampler state */
	r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom);

	/* Emit sampler view (texture resource) state */
	r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom);

	/* Emit compute shader state */
	r600_emit_atom(ctx, &ctx->cs_shader_state.atom);

	/* Emit dispatch state and dispatch packet */
	evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);

	/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
	 */
	ctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
		      R600_CONTEXT_INV_VERTEX_CACHE |
	              R600_CONTEXT_INV_TEX_CACHE;
	r600_flush_emit(ctx);
	ctx->b.flags = 0;

	if (ctx->b.chip_class >= CAYMAN) {
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4);
		/* DEALLOC_STATE prevents the GPU from hanging when a
		 * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT
		 * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set.
		 */
		cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0);
		cs->buf[cs->cdw++] = 0;
	}

#if 0
	COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw);
	for (i = 0; i < cs->cdw; i++) {
		COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]);
	}
#endif

}
示例#5
0
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
		const uint *grid_layout)
{
	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
	unsigned flush_flags = 0;
	int i;
	struct r600_resource *onebo = NULL;
	struct evergreen_compute_resource *resources =
					ctx->cs_shader_state.shader->resources;

	/* make sure that the gfx ring is only one active */
	if (ctx->rings.dma.cs) {
		ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
	}

	/* Initialize all the compute-related registers.
	 *
	 * See evergreen_init_atom_start_compute_cs() in this file for the list
	 * of registers initialized by the start_compute_cs_cmd atom.
	 */
	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);

	ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
	r600_flush_emit(ctx);

	/* Emit colorbuffers. */
	for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) {
		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
		unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx,
						       (struct r600_resource*)cb->base.texture,
						       RADEON_USAGE_READWRITE);

		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
		r600_write_value(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
		r600_write_value(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
		r600_write_value(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
		r600_write_value(cs, cb->cb_color_info);	/* R_028C70_CB_COLOR0_INFO */
		r600_write_value(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, cb->cb_color_dim);		/* R_028C78_CB_COLOR0_DIM */

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, reloc);

		if (!ctx->keep_tiling_flags) {
			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
			r600_write_value(cs, reloc);
		}

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, reloc);
	}

	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
	r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
					ctx->compute_cb_target_mask);


	/* Emit vertex buffer state */
	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);

	/* Emit compute shader state */
	r600_emit_atom(ctx, &ctx->cs_shader_state.atom);

	for (i = 0; i < get_compute_resource_num(); i++) {
		if (resources[i].enabled) {
			int j;
			COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw);

			for (j = 0; j < resources[i].cs_end; j++) {
				if (resources[i].do_reloc[j]) {
					assert(resources[i].bo);
					evergreen_emit_ctx_reloc(ctx,
						resources[i].bo,
						resources[i].usage);
				}

				cs->buf[cs->cdw++] = resources[i].cs[j];
			}

			if (resources[i].bo) {
				onebo = resources[i].bo;
				evergreen_emit_ctx_reloc(ctx,
					resources[i].bo,
					resources[i].usage);

				///special case for textures
				if (resources[i].do_reloc
					[resources[i].cs_end] == 2) {
					evergreen_emit_ctx_reloc(ctx,
						resources[i].bo,
						resources[i].usage);
				}
			}
		}
	}

	/* Emit dispatch state and dispatch packet */
	evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);

	/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
	 */
	ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
	r600_flush_emit(ctx);

#if 0
	COMPUTE_DBG("cdw: %i\n", cs->cdw);
	for (i = 0; i < cs->cdw; i++) {
		COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]);
	}
#endif

	flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE;
	if (ctx->keep_tiling_flags) {
		flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
	}

	ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags);

	ctx->pm4_dirty_cdwords = 0;
	ctx->flags = 0;

	COMPUTE_DBG("shader started\n");

	ctx->ws->buffer_wait(onebo->buf, 0);

	COMPUTE_DBG("...\n");

	ctx->streamout_start = TRUE;
	ctx->streamout_append_bitmask = ~0;

}