示例#1
0
static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
{
    struct si_descriptors *desc = &sctx->vertex_buffers;
    int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
    int i;

    for (i = 0; i < count; i++) {
        int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;

        if (vb >= Elements(sctx->vertex_buffer))
            continue;
        if (!sctx->vertex_buffer[vb].buffer)
            continue;

        r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                              (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
                              RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
    }

    if (!desc->buffer)
        return;
    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                          desc->buffer, RADEON_USAGE_READ,
                          RADEON_PRIO_SHADER_DATA);
}
示例#2
0
static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
                                unsigned slot, struct pipe_sampler_view *view,
                                unsigned *view_desc)
{
    struct si_sampler_views *views = &sctx->samplers[shader].views;

    if (views->views[slot] == view)
        return;

    if (view) {
        struct si_sampler_view *rview =
            (struct si_sampler_view*)view;

        if (rview->resource)
            r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                  rview->resource, RADEON_USAGE_READ,
                                  si_get_resource_ro_priority(rview->resource));

        pipe_sampler_view_reference(&views->views[slot], view);
        memcpy(views->desc.list + slot*8, view_desc, 8*4);
        views->desc.enabled_mask |= 1llu << slot;
    } else {
        pipe_sampler_view_reference(&views->views[slot], NULL);
        memcpy(views->desc.list + slot*8, null_descriptor, 8*4);
        views->desc.enabled_mask &= ~(1llu << slot);
    }

    views->desc.list_dirty = true;
}
示例#3
0
static bool si_upload_descriptors(struct si_context *sctx,
                                  struct si_descriptors *desc)
{
    unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
    void *ptr;

    if (!desc->list_dirty)
        return true;

    u_upload_alloc(sctx->b.uploader, 0, list_size,
                   &desc->buffer_offset,
                   (struct pipe_resource**)&desc->buffer, &ptr);
    if (!desc->buffer)
        return false; /* skip the draw call */

    util_memcpy_cpu_to_le32(ptr, desc->list, list_size);

    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
                          RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);

    desc->list_dirty = false;
    desc->pointer_dirty = true;
    si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
    return true;
}
示例#4
0
/**
 * Emit function for r600_cs_shader_state atom
 */
void evergreen_emit_cs_shader(
		struct r600_context *rctx,
		struct r600_atom *atom)
{
	struct r600_cs_shader_state *state =
					(struct r600_cs_shader_state*)atom;
	struct r600_pipe_compute *shader = state->shader;
	struct radeon_winsys_cs *cs = rctx->cs;
	uint64_t va;

	va = r600_resource_va(&rctx->screen->screen, &shader->shader_code_bo->b.b);

	r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
	r600_write_value(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
	r600_write_value(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
			S_0288D4_NUM_GPRS(shader->bc.ngpr)
			| S_0288D4_STACK_SIZE(shader->bc.nstack));
	r600_write_value(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */

	r600_write_value(cs, PKT3C(PKT3_NOP, 0, 0));
	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->shader_code_bo,
							RADEON_USAGE_READ));

	rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
}
示例#5
0
static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
                                   struct pipe_constant_buffer *input)
{
    struct si_context *sctx = (struct si_context *)ctx;
    struct si_buffer_resources *buffers = &sctx->const_buffers[shader];

    if (shader >= SI_NUM_SHADERS)
        return;

    assert(slot < buffers->desc.num_elements);
    pipe_resource_reference(&buffers->buffers[slot], NULL);

    /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
     * with a NULL buffer). We need to use a dummy buffer instead. */
    if (sctx->b.chip_class == CIK &&
            (!input || (!input->buffer && !input->user_buffer)))
        input = &sctx->null_const_buf;

    if (input && (input->buffer || input->user_buffer)) {
        struct pipe_resource *buffer = NULL;
        uint64_t va;

        /* Upload the user buffer if needed. */
        if (input->user_buffer) {
            unsigned buffer_offset;

            si_upload_const_buffer(sctx,
                                   (struct r600_resource**)&buffer, input->user_buffer,
                                   input->buffer_size, &buffer_offset);
            if (!buffer) {
                /* Just unbind on failure. */
                si_set_constant_buffer(ctx, shader, slot, NULL);
                return;
            }
            va = r600_resource(buffer)->gpu_address + buffer_offset;
        } else {
            pipe_resource_reference(&buffer, input->buffer);
            va = r600_resource(buffer)->gpu_address + input->buffer_offset;
        }

        /* Set the descriptor. */
        uint32_t *desc = buffers->desc.list + slot*4;
        desc[0] = va;
        desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
                  S_008F04_STRIDE(0);
        desc[2] = input->buffer_size;
        desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
                  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);

        buffers->buffers[slot] = buffer;
        r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                              (struct r600_resource*)buffer,
                              buffers->shader_usage, buffers->priority);
        buffers->desc.enabled_mask |= 1llu << slot;
    } else {
示例#6
0
static void si_sampler_states_begin_new_cs(struct si_context *sctx,
        struct si_sampler_states *states)
{
    if (!states->desc.buffer)
        return;
    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
                          RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
}
示例#7
0
static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
        struct si_buffer_resources *buffers)
{
    uint64_t mask = buffers->desc.enabled_mask;

    /* Add relocations to the CS. */
    while (mask) {
        int i = u_bit_scan64(&mask);

        r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                              (struct r600_resource*)buffers->buffers[i],
                              buffers->shader_usage, buffers->priority);
    }

    if (!buffers->desc.buffer)
        return;
    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                          buffers->desc.buffer, RADEON_USAGE_READWRITE,
                          RADEON_PRIO_SHADER_DATA);
}
示例#8
0
文件: cik_sdma.c 项目: olvaffe/mesa
static void cik_sdma_do_copy_buffer(struct si_context *ctx,
				    struct pipe_resource *dst,
				    struct pipe_resource *src,
				    uint64_t dst_offset,
				    uint64_t src_offset,
				    uint64_t size)
{
	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
	unsigned i, ncopy, csize;
	struct r600_resource *rdst = (struct r600_resource*)dst;
	struct r600_resource *rsrc = (struct r600_resource*)src;

	dst_offset += r600_resource(dst)->gpu_address;
	src_offset += r600_resource(src)->gpu_address;

	ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
	r600_need_dma_space(&ctx->b, ncopy * 7);

	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
			      RADEON_PRIO_MIN);
	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
			      RADEON_PRIO_MIN);

	for (i = 0; i < ncopy; i++) {
		csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
		cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
						     CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
						     0);
		cs->buf[cs->cdw++] = csize;
		cs->buf[cs->cdw++] = 0; /* src/dst endian swap */
		cs->buf[cs->cdw++] = src_offset;
		cs->buf[cs->cdw++] = src_offset >> 32;
		cs->buf[cs->cdw++] = dst_offset;
		cs->buf[cs->cdw++] = dst_offset >> 32;
		dst_offset += csize;
		src_offset += csize;
		size -= csize;
	}
}
示例#9
0
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
        struct si_sampler_views *views)
{
    uint64_t mask = views->desc.enabled_mask;

    /* Add relocations to the CS. */
    while (mask) {
        int i = u_bit_scan64(&mask);
        struct si_sampler_view *rview =
            (struct si_sampler_view*)views->views[i];

        if (!rview->resource)
            continue;

        r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                              rview->resource, RADEON_USAGE_READ,
                              si_get_resource_ro_priority(rview->resource));
    }

    if (!views->desc.buffer)
        return;
    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
                          RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
}
示例#10
0
void si_trace_emit(struct si_context *sctx)
{
	struct si_screen *sscreen = sctx->screen;
	struct radeon_winsys_cs *cs = sctx->cs;
	uint64_t va;

	va = r600_resource_va(&sscreen->screen, (void*)sscreen->b.trace_bo);
	r600_context_bo_reloc(sctx, sscreen->b.trace_bo, RADEON_USAGE_READWRITE);
	cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
	cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
				PKT3_WRITE_DATA_WR_CONFIRM |
				PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
	cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
	cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
	cs->buf[cs->cdw++] = cs->cdw;
	cs->buf[cs->cdw++] = sscreen->b.cs_count;
}
示例#11
0
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
{
	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
	for (int i = 0; i < state->nbo; ++i) {
		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
				      state->bo_usage[i], state->bo_priority[i]);
	}

	memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4);

	for (int i = 0; i < state->nrelocs; ++i) {
		cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2;
	}

	cs->cdw += state->ndw;

#if SI_TRACE_CS
	if (sctx->screen->b.trace_bo) {
		si_trace_emit(sctx);
	}
#endif
}
示例#12
0
/**
 * Emit function for r600_cs_shader_state atom
 */
void evergreen_emit_cs_shader(
		struct r600_context *rctx,
		struct r600_atom *atom)
{
	struct r600_cs_shader_state *state =
					(struct r600_cs_shader_state*)atom;
	struct r600_pipe_compute *shader = state->shader;
	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
	uint64_t va;
	struct r600_resource *code_bo;
	unsigned ngpr, nstack;

#if HAVE_LLVM < 0x0306
	struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
	code_bo = kernel->code_bo;
	va = kernel->code_bo->gpu_address;
	ngpr = kernel->bc.ngpr;
	nstack = kernel->bc.nstack;
#else
	code_bo = shader->code_bo;
	va = shader->code_bo->gpu_address + state->pc;
	ngpr = shader->bc.ngpr;
	nstack = shader->bc.nstack;
#endif

	r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
	radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
	radeon_emit(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
			S_0288D4_NUM_GPRS(ngpr)
			| S_0288D4_STACK_SIZE(nstack));
	radeon_emit(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */

	radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
					      code_bo, RADEON_USAGE_READ,
					      RADEON_PRIO_SHADER_DATA));
}
示例#13
0
/**
 * Emit function for r600_cs_shader_state atom
 */
void evergreen_emit_cs_shader(
		struct r600_context *rctx,
		struct r600_atom *atom)
{
	struct r600_cs_shader_state *state =
					(struct r600_cs_shader_state*)atom;
	struct r600_pipe_compute *shader = state->shader;
	struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
	uint64_t va;

	va = r600_resource_va(&rctx->screen->b.b, &kernel->code_bo->b.b);

	r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
	radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
	radeon_emit(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
			S_0288D4_NUM_GPRS(kernel->bc.ngpr)
			| S_0288D4_STACK_SIZE(kernel->bc.nstack));
	radeon_emit(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */

	radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
							kernel->code_bo, RADEON_USAGE_READ));
}
示例#14
0
文件: cik_sdma.c 项目: olvaffe/mesa
static void cik_sdma_copy_tile(struct si_context *ctx,
			       struct pipe_resource *dst,
			       unsigned dst_level,
			       struct pipe_resource *src,
			       unsigned src_level,
			       unsigned y,
			       unsigned copy_height,
			       unsigned y_align,
			       unsigned pitch,
			       unsigned bpe)
{
	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
	struct si_screen *sscreen = ctx->screen;
	struct r600_texture *rsrc = (struct r600_texture*)src;
	struct r600_texture *rdst = (struct r600_texture*)dst;
	struct r600_texture *rlinear, *rtiled;
	unsigned linear_lvl, tiled_lvl;
	unsigned array_mode, lbpe, pitch_tile_max, slice_tile_max, size;
	unsigned ncopy, height, cheight, detile, i, src_mode, dst_mode;
	unsigned sub_op, bank_h, bank_w, mt_aspect, nbanks, tile_split, mt;
	uint64_t base, addr;
	unsigned pipe_config, tile_mode_index;

	dst_mode = rdst->surface.level[dst_level].mode;
	src_mode = rsrc->surface.level[src_level].mode;
	/* downcast linear aligned to linear to simplify test */
	src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
	dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
	assert(dst_mode != src_mode);
	assert(src_mode == RADEON_SURF_MODE_LINEAR || dst_mode == RADEON_SURF_MODE_LINEAR);

	sub_op = CIK_SDMA_COPY_SUB_OPCODE_TILED;
	lbpe = util_logbase2(bpe);
	pitch_tile_max = ((pitch / bpe) / 8) - 1;

	detile = dst_mode == RADEON_SURF_MODE_LINEAR;
	rlinear = detile ? rdst : rsrc;
	rtiled = detile ? rsrc : rdst;
	linear_lvl = detile ? dst_level : src_level;
	tiled_lvl = detile ? src_level : dst_level;

	assert(!util_format_is_depth_and_stencil(rtiled->resource.b.b.format));

	array_mode = si_array_mode(rtiled->surface.level[tiled_lvl].mode);
	slice_tile_max = (rtiled->surface.level[tiled_lvl].nblk_x *
			  rtiled->surface.level[tiled_lvl].nblk_y) / (8*8) - 1;
	height = rlinear->surface.level[linear_lvl].nblk_y;
	base = rtiled->surface.level[tiled_lvl].offset;
	addr = rlinear->surface.level[linear_lvl].offset;
	bank_h = cik_bank_wh(rtiled->surface.bankh);
	bank_w = cik_bank_wh(rtiled->surface.bankw);
	mt_aspect = cik_macro_tile_aspect(rtiled->surface.mtilea);
	tile_split = cik_tile_split(rtiled->surface.tile_split);
	tile_mode_index = si_tile_mode_index(rtiled, tiled_lvl, false);
	nbanks = si_num_banks(sscreen, rtiled);
	base += rtiled->resource.gpu_address;
	addr += rlinear->resource.gpu_address;

	pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
	mt = cik_micro_tile_mode(sscreen, tile_mode_index);

	size = (copy_height * pitch) / 4;
	cheight = copy_height;
	if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
		cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
		cheight &= ~(y_align - 1);
	}
	ncopy = (copy_height + cheight - 1) / cheight;
	r600_need_dma_space(&ctx->b, ncopy * 12);

	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
			      RADEON_USAGE_READ, RADEON_PRIO_MIN);
	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
			      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);

	copy_height = size * 4 / pitch;
	for (i = 0; i < ncopy; i++) {
		cheight = copy_height;
		if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
			cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
			cheight &= ~(y_align - 1);
		}
		size = (cheight * pitch) / 4;

		cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
						     sub_op, detile << 15);
		cs->buf[cs->cdw++] = base;
		cs->buf[cs->cdw++] = base >> 32;
		cs->buf[cs->cdw++] = ((height - 1) << 16) | pitch_tile_max;
		cs->buf[cs->cdw++] = slice_tile_max;
		cs->buf[cs->cdw++] = (pipe_config << 26) | (mt_aspect << 24) |
			(nbanks << 21) | (bank_h << 18) | (bank_w << 15) |
			(tile_split << 11) | (mt << 8) | (array_mode << 3) |
			lbpe;
		cs->buf[cs->cdw++] = y << 16; /* | x */
		cs->buf[cs->cdw++] = 0; /* z */;
		cs->buf[cs->cdw++] = addr & 0xfffffffc;
		cs->buf[cs->cdw++] = addr >> 32;
		cs->buf[cs->cdw++] = (pitch / bpe) - 1;
		cs->buf[cs->cdw++] = size;

		copy_height -= cheight;
		y += cheight;
	}
}
示例#15
0
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
    struct si_descriptors *desc = &sctx->vertex_buffers;
    bool bound[SI_NUM_VERTEX_BUFFERS] = {};
    unsigned i, count = sctx->vertex_elements->count;
    uint64_t va;
    uint32_t *ptr;

    if (!sctx->vertex_buffers_dirty)
        return true;
    if (!count || !sctx->vertex_elements)
        return true;

    /* Vertex buffer descriptors are the only ones which are uploaded
     * directly through a staging buffer and don't go through
     * the fine-grained upload path.
     */
    u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
                   (struct pipe_resource**)&desc->buffer, (void**)&ptr);
    if (!desc->buffer)
        return false;

    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                          desc->buffer, RADEON_USAGE_READ,
                          RADEON_PRIO_SHADER_DATA);

    assert(count <= SI_NUM_VERTEX_BUFFERS);

    for (i = 0; i < count; i++) {
        struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
        struct pipe_vertex_buffer *vb;
        struct r600_resource *rbuffer;
        unsigned offset;
        uint32_t *desc = &ptr[i*4];

        if (ve->vertex_buffer_index >= Elements(sctx->vertex_buffer)) {
            memset(desc, 0, 16);
            continue;
        }

        vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
        rbuffer = (struct r600_resource*)vb->buffer;
        if (rbuffer == NULL) {
            memset(desc, 0, 16);
            continue;
        }

        offset = vb->buffer_offset + ve->src_offset;
        va = rbuffer->gpu_address + offset;

        /* Fill in T# buffer resource description */
        desc[0] = va & 0xFFFFFFFF;
        desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
                  S_008F04_STRIDE(vb->stride);

        if (sctx->b.chip_class <= CIK && vb->stride)
            /* Round up by rounding down and adding 1 */
            desc[2] = (vb->buffer->width0 - offset -
                       sctx->vertex_elements->format_size[i]) /
                      vb->stride + 1;
        else
            desc[2] = vb->buffer->width0 - offset;

        desc[3] = sctx->vertex_elements->rsrc_word3[i];

        if (!bound[ve->vertex_buffer_index]) {
            r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                  (struct r600_resource*)vb->buffer,
                                  RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
            bound[ve->vertex_buffer_index] = true;
        }
    }

    /* Don't flush the const cache. It would have a very negative effect
     * on performance (confirmed by testing). New descriptors are always
     * uploaded to a fresh new buffer, so I don't think flushing the const
     * cache is needed. */
    desc->pointer_dirty = true;
    si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
    sctx->vertex_buffers_dirty = false;
    return true;
}
示例#16
0
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
		const uint *grid_layout)
{
	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
	unsigned flush_flags = 0;
	int i;
	struct r600_resource *onebo = NULL;
	struct evergreen_compute_resource *resources =
					ctx->cs_shader_state.shader->resources;

	/* make sure that the gfx ring is only one active */
	if (ctx->rings.dma.cs) {
		ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
	}

	/* Initialize all the compute-related registers.
	 *
	 * See evergreen_init_atom_start_compute_cs() in this file for the list
	 * of registers initialized by the start_compute_cs_cmd atom.
	 */
	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);

	ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
	r600_flush_emit(ctx);

	/* Emit colorbuffers. */
	for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) {
		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
		unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx,
						       (struct r600_resource*)cb->base.texture,
						       RADEON_USAGE_READWRITE);

		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
		r600_write_value(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
		r600_write_value(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
		r600_write_value(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
		r600_write_value(cs, cb->cb_color_info);	/* R_028C70_CB_COLOR0_INFO */
		r600_write_value(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, cb->cb_color_dim);		/* R_028C78_CB_COLOR0_DIM */

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, reloc);

		if (!ctx->keep_tiling_flags) {
			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
			r600_write_value(cs, reloc);
		}

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, reloc);
	}

	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
	r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
					ctx->compute_cb_target_mask);


	/* Emit vertex buffer state */
	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);

	/* Emit compute shader state */
	r600_emit_atom(ctx, &ctx->cs_shader_state.atom);

	for (i = 0; i < get_compute_resource_num(); i++) {
		if (resources[i].enabled) {
			int j;
			COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw);

			for (j = 0; j < resources[i].cs_end; j++) {
				if (resources[i].do_reloc[j]) {
					assert(resources[i].bo);
					evergreen_emit_ctx_reloc(ctx,
						resources[i].bo,
						resources[i].usage);
				}

				cs->buf[cs->cdw++] = resources[i].cs[j];
			}

			if (resources[i].bo) {
				onebo = resources[i].bo;
				evergreen_emit_ctx_reloc(ctx,
					resources[i].bo,
					resources[i].usage);

				///special case for textures
				if (resources[i].do_reloc
					[resources[i].cs_end] == 2) {
					evergreen_emit_ctx_reloc(ctx,
						resources[i].bo,
						resources[i].usage);
				}
			}
		}
	}

	/* Emit dispatch state and dispatch packet */
	evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);

	/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
	 */
	ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
	r600_flush_emit(ctx);

#if 0
	COMPUTE_DBG("cdw: %i\n", cs->cdw);
	for (i = 0; i < cs->cdw; i++) {
		COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]);
	}
#endif

	flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE;
	if (ctx->keep_tiling_flags) {
		flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
	}

	ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags);

	ctx->pm4_dirty_cdwords = 0;
	ctx->flags = 0;

	COMPUTE_DBG("shader started\n");

	ctx->ws->buffer_wait(onebo->buf, 0);

	COMPUTE_DBG("...\n");

	ctx->streamout_start = TRUE;
	ctx->streamout_append_bitmask = ~0;

}
示例#17
0
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
		const uint *grid_layout)
{
	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
	unsigned flush_flags = 0;
	int i;

	/* make sure that the gfx ring is only one active */
	if (ctx->rings.dma.cs) {
		ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
	}

	/* Initialize all the compute-related registers.
	 *
	 * See evergreen_init_atom_start_compute_cs() in this file for the list
	 * of registers initialized by the start_compute_cs_cmd atom.
	 */
	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);

	ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
	r600_flush_emit(ctx);

	/* Emit colorbuffers. */
	for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) {
		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
		unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx,
						       (struct r600_resource*)cb->base.texture,
						       RADEON_USAGE_READWRITE);

		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
		r600_write_value(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
		r600_write_value(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
		r600_write_value(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
		r600_write_value(cs, cb->cb_color_info);	/* R_028C70_CB_COLOR0_INFO */
		r600_write_value(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, cb->cb_color_dim);		/* R_028C78_CB_COLOR0_DIM */

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
		r600_write_value(cs, reloc);

		if (!ctx->keep_tiling_flags) {
			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
			r600_write_value(cs, reloc);
		}

		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
		r600_write_value(cs, reloc);
	}

	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
	r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
					ctx->compute_cb_target_mask);


	/* Emit vertex buffer state */
	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);

	/* Emit constant buffer state */
	r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);

	/* Emit compute shader state */
	r600_emit_atom(ctx, &ctx->cs_shader_state.atom);

	/* Emit dispatch state and dispatch packet */
	evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);

	/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
	 */
	ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
	r600_flush_emit(ctx);

#if 0
	COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw);
	for (i = 0; i < cs->cdw; i++) {
		COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, ctx->cs->buf[i]);
	}
#endif

	flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE;
	if (ctx->keep_tiling_flags) {
		flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
	}

	ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags, ctx->screen->cs_count++);

	ctx->flags = 0;

	COMPUTE_DBG(ctx->screen, "shader started\n");
}
示例#18
0
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
		const uint *grid_layout)
{
	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
	int i;

	/* make sure that the gfx ring is only one active */
	if (ctx->b.rings.dma.cs) {
		ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
	}

	/* Initialize all the compute-related registers.
	 *
	 * See evergreen_init_atom_start_compute_cs() in this file for the list
	 * of registers initialized by the start_compute_cs_cmd atom.
	 */
	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);

	ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
	r600_flush_emit(ctx);

	/* Emit colorbuffers. */
	/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
	for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
		unsigned reloc = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
						       (struct r600_resource*)cb->base.texture,
						       RADEON_USAGE_READWRITE);

		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
		radeon_emit(cs, cb->cb_color_info);	/* R_028C70_CB_COLOR0_INFO */
		radeon_emit(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
		radeon_emit(cs, cb->cb_color_dim);		/* R_028C78_CB_COLOR0_DIM */

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
		radeon_emit(cs, reloc);

		if (!ctx->keep_tiling_flags) {
			radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */
			radeon_emit(cs, reloc);
		}

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
		radeon_emit(cs, reloc);
	}
	if (ctx->keep_tiling_flags) {
		for (; i < 8 ; i++) {
			r600_write_compute_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
						       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
		}
		for (; i < 12; i++) {
			r600_write_compute_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
						       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
		}
	}

	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
	r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
					ctx->compute_cb_target_mask);


	/* Emit vertex buffer state */
	ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
	r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);

	/* Emit constant buffer state */
	r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);

	/* Emit compute shader state */
	r600_emit_atom(ctx, &ctx->cs_shader_state.atom);

	/* Emit dispatch state and dispatch packet */
	evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);

	/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
	 */
	ctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
		      R600_CONTEXT_INV_VERTEX_CACHE |
	              R600_CONTEXT_INV_TEX_CACHE;
	r600_flush_emit(ctx);
	ctx->b.flags = 0;

	if (ctx->b.chip_class >= CAYMAN) {
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4);
		/* DEALLOC_STATE prevents the GPU from hanging when a
		 * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT
		 * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set.
		 */
		cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0);
		cs->buf[cs->cdw++] = 0;
	}

#if 0
	COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw);
	for (i = 0; i < cs->cdw; i++) {
		COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]);
	}
#endif

}