Пример #1
0
static void si_blit_decompress_depth_in_place(struct si_context *sctx,
                                              struct r600_texture *texture,
					      bool is_stencil_sampler,
                                              unsigned first_level, unsigned last_level,
                                              unsigned first_layer, unsigned last_layer)
{
	struct pipe_surface *zsurf, surf_tmpl = {{0}};
	unsigned layer, max_layer, checked_last_layer, level;
	unsigned *dirty_level_mask;

	if (is_stencil_sampler) {
		sctx->db_flush_stencil_inplace = true;
		dirty_level_mask = &texture->stencil_dirty_level_mask;
	} else {
		sctx->db_flush_depth_inplace = true;
		dirty_level_mask = &texture->dirty_level_mask;
	}
	si_mark_atom_dirty(sctx, &sctx->db_render_state);

	surf_tmpl.format = texture->resource.b.b.format;

	for (level = first_level; level <= last_level; level++) {
		if (!(*dirty_level_mask & (1 << level)))
			continue;

		surf_tmpl.u.tex.level = level;

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&texture->resource.b.b, level);
		checked_last_layer = last_layer < max_layer ? last_layer : max_layer;

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			surf_tmpl.u.tex.first_layer = layer;
			surf_tmpl.u.tex.last_layer = layer;

			zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl);

			si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
			util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0,
							  sctx->custom_dsa_flush,
							  1.0f);
			si_blitter_end(&sctx->b.b);

			pipe_surface_reference(&zsurf, NULL);
		}

		/* The texture will always be dirty if some layers aren't flushed.
		 * I don't think this case occurs often though. */
		if (first_layer == 0 && last_layer == max_layer) {
			*dirty_level_mask &= ~(1 << level);
		}
	}

	sctx->db_flush_depth_inplace = false;
	sctx->db_flush_stencil_inplace = false;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
Пример #2
0
static bool si_upload_descriptors(struct si_context *sctx,
                                  struct si_descriptors *desc)
{
    unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
    void *ptr;

    if (!desc->list_dirty)
        return true;

    u_upload_alloc(sctx->b.uploader, 0, list_size,
                   &desc->buffer_offset,
                   (struct pipe_resource**)&desc->buffer, &ptr);
    if (!desc->buffer)
        return false; /* skip the draw call */

    util_memcpy_cpu_to_le32(ptr, desc->list, list_size);

    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
                          RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);

    desc->list_dirty = false;
    desc->pointer_dirty = true;
    si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
    return true;
}
Пример #3
0
static bool si_upload_descriptors(struct si_context *sctx,
				  struct si_descriptors *desc,
				  struct r600_atom * atom)
{
	unsigned list_size = desc->num_elements * desc->element_dw_size * 4;

	if (!desc->dirty_mask)
		return true;

	if (sctx->ce_ib) {
		uint32_t const* list = (uint32_t const*)desc->list;

		if (desc->ce_ram_dirty)
			si_reinitialize_ce_ram(sctx, desc);

		while(desc->dirty_mask) {
			int begin, count;
			u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
						     &count);

			begin *= desc->element_dw_size;
			count *= desc->element_dw_size;

			radeon_emit(sctx->ce_ib,
			            PKT3(PKT3_WRITE_CONST_RAM, count, 0));
			radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
			radeon_emit_array(sctx->ce_ib, list + begin, count);
		}

		if (!si_ce_upload(sctx, desc->ce_offset, list_size,
		                           &desc->buffer_offset, &desc->buffer))
			return false;
	} else {
		void *ptr;

		u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
			&desc->buffer_offset,
			(struct pipe_resource**)&desc->buffer, &ptr);
		if (!desc->buffer)
			return false; /* skip the draw call */

		util_memcpy_cpu_to_le32(ptr, desc->list, list_size);

		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
	                            RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
	}
	desc->pointer_dirty = true;
	desc->dirty_mask = 0;

	if (atom)
		si_mark_atom_dirty(sctx, atom);

	return true;
}
Пример #4
0
static void si_set_scissor_states(struct pipe_context *pctx,
				  unsigned start_slot,
				  unsigned num_scissors,
				  const struct pipe_scissor_state *state)
{
	struct si_context *ctx = (struct si_context *)pctx;
	int i;

	for (i = 0; i < num_scissors; i++)
		ctx->scissors.states[start_slot + i] = state[i];

	if (!ctx->queued.named.rasterizer ||
	    !ctx->queued.named.rasterizer->scissor_enable)
		return;

	ctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
	si_mark_atom_dirty(ctx, &ctx->scissors.atom);
}
Пример #5
0
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
    struct si_descriptors *desc = &sctx->vertex_buffers;
    bool bound[SI_NUM_VERTEX_BUFFERS] = {};
    unsigned i, count = sctx->vertex_elements->count;
    uint64_t va;
    uint32_t *ptr;

    if (!sctx->vertex_buffers_dirty)
        return true;
    if (!count || !sctx->vertex_elements)
        return true;

    /* Vertex buffer descriptors are the only ones which are uploaded
     * directly through a staging buffer and don't go through
     * the fine-grained upload path.
     */
    u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
                   (struct pipe_resource**)&desc->buffer, (void**)&ptr);
    if (!desc->buffer)
        return false;

    r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                          desc->buffer, RADEON_USAGE_READ,
                          RADEON_PRIO_SHADER_DATA);

    assert(count <= SI_NUM_VERTEX_BUFFERS);

    for (i = 0; i < count; i++) {
        struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
        struct pipe_vertex_buffer *vb;
        struct r600_resource *rbuffer;
        unsigned offset;
        uint32_t *desc = &ptr[i*4];

        if (ve->vertex_buffer_index >= Elements(sctx->vertex_buffer)) {
            memset(desc, 0, 16);
            continue;
        }

        vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
        rbuffer = (struct r600_resource*)vb->buffer;
        if (rbuffer == NULL) {
            memset(desc, 0, 16);
            continue;
        }

        offset = vb->buffer_offset + ve->src_offset;
        va = rbuffer->gpu_address + offset;

        /* Fill in T# buffer resource description */
        desc[0] = va & 0xFFFFFFFF;
        desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
                  S_008F04_STRIDE(vb->stride);

        if (sctx->b.chip_class <= CIK && vb->stride)
            /* Round up by rounding down and adding 1 */
            desc[2] = (vb->buffer->width0 - offset -
                       sctx->vertex_elements->format_size[i]) /
                      vb->stride + 1;
        else
            desc[2] = vb->buffer->width0 - offset;

        desc[3] = sctx->vertex_elements->rsrc_word3[i];

        if (!bound[ve->vertex_buffer_index]) {
            r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                  (struct r600_resource*)vb->buffer,
                                  RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
            bound[ve->vertex_buffer_index] = true;
        }
    }

    /* Don't flush the const cache. It would have a very negative effect
     * on performance (confirmed by testing). New descriptors are always
     * uploaded to a fresh new buffer, so I don't think flushing the const
     * cache is needed. */
    desc->pointer_dirty = true;
    si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
    sctx->vertex_buffers_dirty = false;
    return true;
}
Пример #6
0
void si_begin_new_cs(struct si_context *ctx)
{
	if (ctx->is_debug) {
		uint32_t zero = 0;

		/* Create a buffer used for writing trace IDs and initialize it to 0. */
		assert(!ctx->trace_buf);
		ctx->trace_buf = (struct r600_resource*)
				 pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM,
						    PIPE_USAGE_STAGING, 4);
		if (ctx->trace_buf)
			pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
						    0, sizeof(zero), &zero);
		ctx->trace_id = 0;
	}

	if (ctx->trace_buf)
		si_trace_emit(ctx);

	/* Flush read caches at the beginning of CS. */
	ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
			SI_CONTEXT_INV_VMEM_L1 |
			SI_CONTEXT_INV_GLOBAL_L2 |
			SI_CONTEXT_INV_SMEM_L1 |
			SI_CONTEXT_INV_ICACHE;

	/* set all valid group as dirty so they get reemited on
	 * next draw command
	 */
	si_pm4_reset_emitted(ctx);

	/* The CS initialization should be emitted before everything else. */
	si_pm4_emit(ctx, ctx->init_config);
	if (ctx->init_config_gs_rings)
		si_pm4_emit(ctx, ctx->init_config_gs_rings);

	ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
	ctx->framebuffer.dirty_zsbuf = true;
	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);

	si_mark_atom_dirty(ctx, &ctx->clip_regs);
	si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
	si_mark_atom_dirty(ctx, &ctx->msaa_config);
	si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
	si_mark_atom_dirty(ctx, &ctx->cb_render_state);
	si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
	si_mark_atom_dirty(ctx, &ctx->db_render_state);
	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
	si_mark_atom_dirty(ctx, &ctx->spi_map);
	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
	si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
	si_all_descriptors_begin_new_cs(ctx);

	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
	si_mark_atom_dirty(ctx, &ctx->scissors.atom);
	si_mark_atom_dirty(ctx, &ctx->viewports.atom);

	r600_postflush_resume_features(&ctx->b);

	ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;

	/* Invalidate various draw states so that they are emitted before
	 * the first draw call. */
	si_invalidate_draw_sh_constants(ctx);
	ctx->last_primitive_restart_en = -1;
	ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
	ctx->last_gs_out_prim = -1;
	ctx->last_prim = -1;
	ctx->last_multi_vgt_param = -1;
	ctx->last_ls_hs_config = -1;
	ctx->last_rast_prim = -1;
	ctx->last_sc_line_stipple = ~0;
	ctx->emit_scratch_reloc = true;
	ctx->last_ls = NULL;
	ctx->last_tcs = NULL;
	ctx->last_tes_sh_base = -1;
	ctx->last_num_tcs_input_cp = -1;
}
Пример #7
0
static void si_clear(struct pipe_context *ctx, unsigned buffers,
		     const union pipe_color_union *color,
		     double depth, unsigned stencil)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
	struct pipe_surface *zsbuf = fb->zsbuf;
	struct r600_texture *zstex =
		zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;

	if (buffers & PIPE_CLEAR_COLOR) {
		evergreen_do_fast_color_clear(&sctx->b, fb,
					      &sctx->framebuffer.atom, &buffers,
					      &sctx->framebuffer.dirty_cbufs,
					      color);
		if (!buffers)
			return; /* all buffers have been fast cleared */
	}

	if (buffers & PIPE_CLEAR_COLOR) {
		int i;

		/* These buffers cannot use fast clear, make sure to disable expansion. */
		for (i = 0; i < fb->nr_cbufs; i++) {
			struct r600_texture *tex;

			/* If not clearing this buffer, skip. */
			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
				continue;

			if (!fb->cbufs[i])
				continue;

			tex = (struct r600_texture *)fb->cbufs[i]->texture;
			if (tex->fmask.size == 0)
				tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
		}
	}

	if (zstex && zstex->htile_buffer &&
	    zsbuf->u.tex.level == 0 &&
	    zsbuf->u.tex.first_layer == 0 &&
	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
		if (buffers & PIPE_CLEAR_DEPTH) {
			/* Need to disable EXPCLEAR temporarily if clearing
			 * to a new value. */
			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
				sctx->db_depth_disable_expclear = true;
			}

			zstex->depth_clear_value = depth;
			sctx->framebuffer.dirty_zsbuf = true;
			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
			sctx->db_depth_clear = true;
			si_mark_atom_dirty(sctx, &sctx->db_render_state);
		}

		if (buffers & PIPE_CLEAR_STENCIL) {
			stencil &= 0xff;

			/* Need to disable EXPCLEAR temporarily if clearing
			 * to a new value. */
			if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
				sctx->db_stencil_disable_expclear = true;
			}

			zstex->stencil_clear_value = stencil;
			sctx->framebuffer.dirty_zsbuf = true;
			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
			sctx->db_stencil_clear = true;
			si_mark_atom_dirty(sctx, &sctx->db_render_state);
		}
	}

	si_blitter_begin(ctx, SI_CLEAR);
	util_blitter_clear(sctx->blitter, fb->width, fb->height,
			   util_framebuffer_get_num_layers(fb),
			   buffers, color, depth, stencil);
	si_blitter_end(ctx);

	if (sctx->db_depth_clear) {
		sctx->db_depth_clear = false;
		sctx->db_depth_disable_expclear = false;
		zstex->depth_cleared = true;
		si_mark_atom_dirty(sctx, &sctx->db_render_state);
	}

	if (sctx->db_stencil_clear) {
		sctx->db_stencil_clear = false;
		sctx->db_stencil_disable_expclear = false;
		zstex->stencil_cleared = true;
		si_mark_atom_dirty(sctx, &sctx->db_render_state);
	}
}
Пример #8
0
/* Helper function for si_blit_decompress_zs_in_place.
 */
static void
si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
				      struct r600_texture *texture,
				      unsigned planes, unsigned level_mask,
				      unsigned first_layer, unsigned last_layer)
{
	struct pipe_surface *zsurf, surf_tmpl = {{0}};
	unsigned layer, max_layer, checked_last_layer;
	unsigned fully_decompressed_mask = 0;

	if (!level_mask)
		return;

	if (planes & PIPE_MASK_S)
		sctx->db_flush_stencil_inplace = true;
	if (planes & PIPE_MASK_Z)
		sctx->db_flush_depth_inplace = true;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);

	surf_tmpl.format = texture->resource.b.b.format;

	while (level_mask) {
		unsigned level = u_bit_scan(&level_mask);

		surf_tmpl.u.tex.level = level;

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&texture->resource.b.b, level);
		checked_last_layer = MIN2(last_layer, max_layer);

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			surf_tmpl.u.tex.first_layer = layer;
			surf_tmpl.u.tex.last_layer = layer;

			zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl);

			si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
			util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0,
							  sctx->custom_dsa_flush,
							  1.0f);
			si_blitter_end(&sctx->b.b);

			pipe_surface_reference(&zsurf, NULL);
		}

		/* The texture will always be dirty if some layers aren't flushed.
		 * I don't think this case occurs often though. */
		if (first_layer == 0 && last_layer == max_layer) {
			fully_decompressed_mask |= 1u << level;
		}
	}

	if (planes & PIPE_MASK_Z)
		texture->dirty_level_mask &= ~fully_decompressed_mask;
	if (planes & PIPE_MASK_S)
		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;

	sctx->db_flush_depth_inplace = false;
	sctx->db_flush_stencil_inplace = false;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
Пример #9
0
static void si_blit_decompress_depth(struct pipe_context *ctx,
				     struct r600_texture *texture,
				     struct r600_texture *staging,
				     unsigned first_level, unsigned last_level,
				     unsigned first_layer, unsigned last_layer,
				     unsigned first_sample, unsigned last_sample)
{
	struct si_context *sctx = (struct si_context *)ctx;
	unsigned layer, level, sample, checked_last_layer, max_layer;
	float depth = 1.0f;
	const struct util_format_description *desc;

	assert(staging != NULL && "use si_blit_decompress_zs_in_place instead");

	desc = util_format_description(staging->resource.b.b.format);

	if (util_format_has_depth(desc))
		sctx->dbcb_depth_copy_enabled = true;
	if (util_format_has_stencil(desc))
		sctx->dbcb_stencil_copy_enabled = true;

	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);

	for (level = first_level; level <= last_level; level++) {
		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&texture->resource.b.b, level);
		checked_last_layer = MIN2(last_layer, max_layer);

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			for (sample = first_sample; sample <= last_sample; sample++) {
				struct pipe_surface *zsurf, *cbsurf, surf_tmpl;

				sctx->dbcb_copy_sample = sample;
				si_mark_atom_dirty(sctx, &sctx->db_render_state);

				surf_tmpl.format = texture->resource.b.b.format;
				surf_tmpl.u.tex.level = level;
				surf_tmpl.u.tex.first_layer = layer;
				surf_tmpl.u.tex.last_layer = layer;

				zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl);

				surf_tmpl.format = staging->resource.b.b.format;
				cbsurf = ctx->create_surface(ctx,
						(struct pipe_resource*)staging, &surf_tmpl);

				si_blitter_begin(ctx, SI_DECOMPRESS);
				util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
								  sctx->custom_dsa_flush, depth);
				si_blitter_end(ctx);

				pipe_surface_reference(&zsurf, NULL);
				pipe_surface_reference(&cbsurf, NULL);
			}
		}
	}

	sctx->dbcb_depth_copy_enabled = false;
	sctx->dbcb_stencil_copy_enabled = false;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
Пример #10
0
static void si_blit_decompress_depth(struct pipe_context *ctx,
				     struct r600_texture *texture,
				     struct r600_texture *staging,
				     unsigned first_level, unsigned last_level,
				     unsigned first_layer, unsigned last_layer,
				     unsigned first_sample, unsigned last_sample)
{
	struct si_context *sctx = (struct si_context *)ctx;
	unsigned layer, level, sample, checked_last_layer, max_layer, max_sample;
	float depth = 1.0f;
	const struct util_format_description *desc;
	struct r600_texture *flushed_depth_texture = staging ?
			staging : texture->flushed_depth_texture;

	if (!staging && !texture->dirty_level_mask)
		return;

	max_sample = u_max_sample(&texture->resource.b.b);

	desc = util_format_description(flushed_depth_texture->resource.b.b.format);

	if (util_format_has_depth(desc))
		sctx->dbcb_depth_copy_enabled = true;
	if (util_format_has_stencil(desc))
		sctx->dbcb_stencil_copy_enabled = true;

	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);

	for (level = first_level; level <= last_level; level++) {
		if (!staging && !(texture->dirty_level_mask & (1 << level)))
			continue;

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&texture->resource.b.b, level);
		checked_last_layer = last_layer < max_layer ? last_layer : max_layer;

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			for (sample = first_sample; sample <= last_sample; sample++) {
				struct pipe_surface *zsurf, *cbsurf, surf_tmpl;

				sctx->dbcb_copy_sample = sample;
				si_mark_atom_dirty(sctx, &sctx->db_render_state);

				surf_tmpl.format = texture->resource.b.b.format;
				surf_tmpl.u.tex.level = level;
				surf_tmpl.u.tex.first_layer = layer;
				surf_tmpl.u.tex.last_layer = layer;

				zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl);

				surf_tmpl.format = flushed_depth_texture->resource.b.b.format;
				cbsurf = ctx->create_surface(ctx,
						(struct pipe_resource*)flushed_depth_texture, &surf_tmpl);

				si_blitter_begin(ctx, SI_DECOMPRESS);
				util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
								  sctx->custom_dsa_flush, depth);
				si_blitter_end(ctx);

				pipe_surface_reference(&zsurf, NULL);
				pipe_surface_reference(&cbsurf, NULL);
			}
		}

		/* The texture will always be dirty if some layers aren't flushed.
		 * I don't think this case can occur though. */
		if (!staging &&
		    first_layer == 0 && last_layer == max_layer &&
		    first_sample == 0 && last_sample == max_sample) {
			texture->dirty_level_mask &= ~(1 << level);
		}
	}

	sctx->dbcb_depth_copy_enabled = false;
	sctx->dbcb_stencil_copy_enabled = false;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
Пример #11
0
static unsigned
si_blit_dbcb_copy(struct si_context *sctx,
		  struct r600_texture *src,
		  struct r600_texture *dst,
		  unsigned planes, unsigned level_mask,
		  unsigned first_layer, unsigned last_layer,
		  unsigned first_sample, unsigned last_sample)
{
	struct pipe_surface surf_tmpl = {{0}};
	unsigned layer, sample, checked_last_layer, max_layer;
	unsigned fully_copied_levels = 0;

	if (planes & PIPE_MASK_Z)
		sctx->dbcb_depth_copy_enabled = true;
	if (planes & PIPE_MASK_S)
		sctx->dbcb_stencil_copy_enabled = true;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);

	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);

	while (level_mask) {
		unsigned level = u_bit_scan(&level_mask);

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&src->resource.b.b, level);
		checked_last_layer = MIN2(last_layer, max_layer);

		surf_tmpl.u.tex.level = level;

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			struct pipe_surface *zsurf, *cbsurf;

			surf_tmpl.format = src->resource.b.b.format;
			surf_tmpl.u.tex.first_layer = layer;
			surf_tmpl.u.tex.last_layer = layer;

			zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl);

			surf_tmpl.format = dst->resource.b.b.format;
			cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl);

			for (sample = first_sample; sample <= last_sample; sample++) {
				if (sample != sctx->dbcb_copy_sample) {
					sctx->dbcb_copy_sample = sample;
					si_mark_atom_dirty(sctx, &sctx->db_render_state);
				}

				si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
				util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
								  sctx->custom_dsa_flush, 1.0f);
				si_blitter_end(&sctx->b.b);
			}

			pipe_surface_reference(&zsurf, NULL);
			pipe_surface_reference(&cbsurf, NULL);
		}

		if (first_layer == 0 && last_layer >= max_layer &&
		    first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
			fully_copied_levels |= 1u << level;
	}

	sctx->dbcb_depth_copy_enabled = false;
	sctx->dbcb_stencil_copy_enabled = false;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);

	return fully_copied_levels;
}
Пример #12
0
static void si_clear(struct pipe_context *ctx, unsigned buffers,
		     const union pipe_color_union *color,
		     double depth, unsigned stencil)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
	struct pipe_surface *zsbuf = fb->zsbuf;
	struct si_texture *zstex =
		zsbuf ? (struct si_texture*)zsbuf->texture : NULL;

	if (buffers & PIPE_CLEAR_COLOR) {
		si_do_fast_color_clear(sctx, &buffers, color);
		if (!buffers)
			return; /* all buffers have been fast cleared */

		/* These buffers cannot use fast clear, make sure to disable expansion. */
		for (unsigned i = 0; i < fb->nr_cbufs; i++) {
			struct si_texture *tex;

			/* If not clearing this buffer, skip. */
			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)) || !fb->cbufs[i])
				continue;

			tex = (struct si_texture *)fb->cbufs[i]->texture;
			if (tex->surface.fmask_size == 0)
				tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
		}
	}

	if (zstex &&
	    si_htile_enabled(zstex, zsbuf->u.tex.level) &&
	    zsbuf->u.tex.first_layer == 0 &&
	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
		/* TC-compatible HTILE only supports depth clears to 0 or 1. */
		if (buffers & PIPE_CLEAR_DEPTH &&
		    (!zstex->tc_compatible_htile ||
		     depth == 0 || depth == 1)) {
			/* Need to disable EXPCLEAR temporarily if clearing
			 * to a new value. */
			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
				sctx->db_depth_disable_expclear = true;
			}

			if (zstex->depth_clear_value != (float)depth) {
				/* Update DB_DEPTH_CLEAR. */
				zstex->depth_clear_value = depth;
				sctx->framebuffer.dirty_zsbuf = true;
				si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
			}
			sctx->db_depth_clear = true;
			si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
		}

		/* TC-compatible HTILE only supports stencil clears to 0. */
		if (buffers & PIPE_CLEAR_STENCIL &&
		    (!zstex->tc_compatible_htile || stencil == 0)) {
			stencil &= 0xff;

			/* Need to disable EXPCLEAR temporarily if clearing
			 * to a new value. */
			if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
				sctx->db_stencil_disable_expclear = true;
			}

			if (zstex->stencil_clear_value != (uint8_t)stencil) {
				/* Update DB_STENCIL_CLEAR. */
				zstex->stencil_clear_value = stencil;
				sctx->framebuffer.dirty_zsbuf = true;
				si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
			}
			sctx->db_stencil_clear = true;
			si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
		}

		/* TODO: Find out what's wrong here. Fast depth clear leads to
		 * corruption in ARK: Survival Evolved, but that may just be
		 * a coincidence and the root cause is elsewhere.
		 *
		 * The corruption can be fixed by putting the DB flush before
		 * or after the depth clear. (surprisingly)
		 *
		 * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
		 *
		 * This hack decreases back-to-back ClearDepth performance.
		 */
		if ((sctx->db_depth_clear || sctx->db_stencil_clear) &&
		    sctx->screen->options.clear_db_cache_before_clear)
			sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
	}

	si_blitter_begin(sctx, SI_CLEAR);
	util_blitter_clear(sctx->blitter, fb->width, fb->height,
			   util_framebuffer_get_num_layers(fb),
			   buffers, color, depth, stencil);
	si_blitter_end(sctx);

	if (sctx->db_depth_clear) {
		sctx->db_depth_clear = false;
		sctx->db_depth_disable_expclear = false;
		zstex->depth_cleared = true;
		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
	}

	if (sctx->db_stencil_clear) {
		sctx->db_stencil_clear = false;
		sctx->db_stencil_disable_expclear = false;
		zstex->stencil_cleared = true;
		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
	}
}
Пример #13
0
static void si_do_fast_color_clear(struct si_context *sctx,
				   unsigned *buffers,
				   const union pipe_color_union *color)
{
	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
	int i;

	/* This function is broken in BE, so just disable this path for now */
#ifdef PIPE_ARCH_BIG_ENDIAN
	return;
#endif

	if (sctx->render_cond)
		return;

	for (i = 0; i < fb->nr_cbufs; i++) {
		struct si_texture *tex;
		unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;

		if (!fb->cbufs[i])
			continue;

		/* if this colorbuffer is not being cleared */
		if (!(*buffers & clear_bit))
			continue;

		unsigned level = fb->cbufs[i]->u.tex.level;
		if (level > 0)
			continue;

		tex = (struct si_texture *)fb->cbufs[i]->texture;

		/* TODO: GFX9: Implement DCC fast clear for level 0 of
		 * mipmapped textures. Mipmapped DCC has to clear a rectangular
		 * area of DCC for level 0 (because the whole miptree is
		 * organized in a 2D plane).
		 */
		if (sctx->chip_class >= GFX9 &&
		    tex->buffer.b.b.last_level > 0)
			continue;

		/* the clear is allowed if all layers are bound */
		if (fb->cbufs[i]->u.tex.first_layer != 0 ||
		    fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->buffer.b.b, 0)) {
			continue;
		}

		/* only supported on tiled surfaces */
		if (tex->surface.is_linear) {
			continue;
		}

		/* shared textures can't use fast clear without an explicit flush,
		 * because there is no way to communicate the clear color among
		 * all clients
		 */
		if (tex->buffer.b.is_shared &&
		    !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
			continue;

		if (sctx->chip_class <= GFX8 &&
		    tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
		    !sctx->screen->info.htile_cmask_support_1d_tiling)
			continue;

		/* Use a slow clear for small surfaces where the cost of
		 * the eliminate pass can be higher than the benefit of fast
		 * clear. The closed driver does this, but the numbers may differ.
		 *
		 * This helps on both dGPUs and APUs, even small APUs like Mullins.
		 */
		bool too_small = tex->buffer.b.b.nr_samples <= 1 &&
				 tex->buffer.b.b.width0 *
				 tex->buffer.b.b.height0 <= 512 * 512;
		bool eliminate_needed = false;
		bool fmask_decompress_needed = false;

		/* Fast clear is the most appropriate place to enable DCC for
		 * displayable surfaces.
		 */
		if (sctx->family == CHIP_STONEY && !too_small) {
			vi_separate_dcc_try_enable(sctx, tex);

			/* RB+ isn't supported with a CMASK clear only on Stoney,
			 * so all clears are considered to be hypothetically slow
			 * clears, which is weighed when determining whether to
			 * enable separate DCC.
			 */
			if (tex->dcc_gather_statistics) /* only for Stoney */
				tex->num_slow_clears++;
		}

		/* Try to clear DCC first, otherwise try CMASK. */
		if (vi_dcc_enabled(tex, 0)) {
			uint32_t reset_value;

			if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
				continue;

			/* This can happen with mipmapping or MSAA. */
			if (sctx->chip_class == GFX8 &&
			    !tex->surface.u.legacy.level[level].dcc_fast_clear_size)
				continue;

			if (!vi_get_fast_clear_parameters(tex->buffer.b.b.format,
							  fb->cbufs[i]->format,
							  color, &reset_value,
							  &eliminate_needed))
				continue;

			if (eliminate_needed && too_small)
				continue;

			/* DCC fast clear with MSAA should clear CMASK to 0xC. */
			if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) {
				/* TODO: This doesn't work with MSAA. */
				if (eliminate_needed)
					continue;

				uint32_t clear_value = 0xCCCCCCCC;
				si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
						tex->cmask_offset, tex->surface.cmask_size,
						&clear_value, 4, SI_COHERENCY_CB_META, false);
				fmask_decompress_needed = true;
			}

			vi_dcc_clear_level(sctx, tex, 0, reset_value);
			tex->separate_dcc_dirty = true;
		} else {
			if (too_small)
				continue;

			/* 128-bit formats are unusupported */
			if (tex->surface.bpe > 8) {
				continue;
			}

			/* RB+ doesn't work with CMASK fast clear on Stoney. */
			if (sctx->family == CHIP_STONEY)
				continue;

			/* ensure CMASK is enabled */
			si_alloc_separate_cmask(sctx->screen, tex);
			if (!tex->cmask_buffer)
				continue;

			/* Do the fast clear. */
			uint32_t clear_value = 0;
			si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
					tex->cmask_offset, tex->surface.cmask_size,
					&clear_value, 4, SI_COHERENCY_CB_META, false);
			eliminate_needed = true;
		}

		if ((eliminate_needed || fmask_decompress_needed) &&
		    !(tex->dirty_level_mask & (1 << level))) {
			tex->dirty_level_mask |= 1 << level;
			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
		}

		/* We can change the micro tile mode before a full clear. */
		si_set_optimal_micro_tile_mode(sctx->screen, tex);

		*buffers &= ~clear_bit;

		/* Chips with DCC constant encoding don't need to set the clear
		 * color registers for DCC clear values 0 and 1.
		 */
		if (sctx->screen->has_dcc_constant_encode && !eliminate_needed)
			continue;

		if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) {
			sctx->framebuffer.dirty_cbufs |= 1 << i;
			si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
		}
	}
}