Beispiel #1
0
static void si_blit_decompress_color(struct pipe_context *ctx,
		struct r600_texture *rtex,
		unsigned first_level, unsigned last_level,
		unsigned first_layer, unsigned last_layer,
		bool need_dcc_decompress)
{
	struct si_context *sctx = (struct si_context *)ctx;
	void* custom_blend;
	unsigned layer, checked_last_layer, max_layer;
	unsigned level_mask =
		u_bit_consecutive(first_level, last_level - first_level + 1);

	if (!need_dcc_decompress)
		level_mask &= rtex->dirty_level_mask;
	if (!level_mask)
		return;

	if (rtex->dcc_offset && need_dcc_decompress) {
		custom_blend = sctx->custom_blend_dcc_decompress;
	} else if (rtex->fmask.size) {
		custom_blend = sctx->custom_blend_decompress;
	} else {
		custom_blend = sctx->custom_blend_fastclear;
	}

	while (level_mask) {
		unsigned level = u_bit_scan(&level_mask);

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&rtex->resource.b.b, level);
		checked_last_layer = MIN2(last_layer, max_layer);

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			struct pipe_surface *cbsurf, surf_tmpl;

			surf_tmpl.format = rtex->resource.b.b.format;
			surf_tmpl.u.tex.level = level;
			surf_tmpl.u.tex.first_layer = layer;
			surf_tmpl.u.tex.last_layer = layer;
			cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);

			si_blitter_begin(ctx, SI_DECOMPRESS);
			util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
			si_blitter_end(ctx);

			pipe_surface_reference(&cbsurf, NULL);
		}

		/* The texture will always be dirty if some layers aren't flushed.
		 * I don't think this case occurs often though. */
		if (first_layer == 0 && last_layer == max_layer) {
			rtex->dirty_level_mask &= ~(1 << level);
		}
	}
}
Beispiel #2
0
/* Decompress Z and/or S planes in place, depending on mask.
 */
static void
si_blit_decompress_zs_in_place(struct si_context *sctx,
			       struct r600_texture *texture,
			       unsigned planes,
			       unsigned first_level, unsigned last_level,
			       unsigned first_layer, unsigned last_layer)
{
	unsigned level_mask =
		u_bit_consecutive(first_level, last_level - first_level + 1);
	unsigned cur_level_mask;

	/* First, do combined Z & S decompresses for levels that need it. */
	if (planes == (PIPE_MASK_Z | PIPE_MASK_S)) {
		cur_level_mask =
			level_mask &
			texture->dirty_level_mask &
			texture->stencil_dirty_level_mask;
		si_blit_decompress_zs_planes_in_place(
				sctx, texture, PIPE_MASK_Z | PIPE_MASK_S,
				cur_level_mask,
				first_layer, last_layer);
		level_mask &= ~cur_level_mask;
	}

	/* Now do separate Z and S decompresses. */
	if (planes & PIPE_MASK_Z) {
		cur_level_mask = level_mask & texture->dirty_level_mask;
		si_blit_decompress_zs_planes_in_place(
				sctx, texture, PIPE_MASK_Z,
				cur_level_mask,
				first_layer, last_layer);
		level_mask &= ~cur_level_mask;
	}

	if (planes & PIPE_MASK_S) {
		cur_level_mask = level_mask & texture->stencil_dirty_level_mask;
		si_blit_decompress_zs_planes_in_place(
				sctx, texture, PIPE_MASK_S,
				cur_level_mask,
				first_layer, last_layer);
	}
}
Beispiel #3
0
static void si_blit_decompress_depth(struct pipe_context *ctx,
				     struct r600_texture *texture,
				     struct r600_texture *staging,
				     unsigned first_level, unsigned last_level,
				     unsigned first_layer, unsigned last_layer,
				     unsigned first_sample, unsigned last_sample)
{
	const struct util_format_description *desc;
	unsigned planes = 0;

	assert(staging != NULL && "use si_blit_decompress_zs_in_place instead");

	desc = util_format_description(staging->resource.b.b.format);

	if (util_format_has_depth(desc))
		planes |= PIPE_MASK_Z;
	if (util_format_has_stencil(desc))
		planes |= PIPE_MASK_S;

	si_blit_dbcb_copy(
		(struct si_context *)ctx, texture, staging, planes,
		u_bit_consecutive(first_level, last_level - first_level + 1),
		first_layer, last_layer, first_sample, last_sample);
}
Beispiel #4
0
/* Create a compute shader implementing clear_buffer or copy_buffer. */
void *si_create_dma_compute_shader(struct pipe_context *ctx,
				   unsigned num_dwords_per_thread,
				   bool dst_stream_cache_policy, bool is_copy)
{
	assert(util_is_power_of_two_nonzero(num_dwords_per_thread));

	unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT;
	if (dst_stream_cache_policy)
		store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY;

	/* Don't cache loads, because there is no reuse. */
	unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY;

	unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4);
	unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned));

	for (unsigned i = 0; i < num_mem_ops; i++) {
		if (i*4 < num_dwords_per_thread)
			inst_dwords[i] = MIN2(4, num_dwords_per_thread - i*4);
	}

	struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE);
	if (!ureg)
		return NULL;

	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64);
	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1);
	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1);

	struct ureg_src value;
	if (!is_copy) {
		ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_DWORDS, inst_dwords[0]);
		value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA, 0);
	}

	struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0);
	struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0);
	struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
	struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
	struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false));
	struct ureg_src srcbuf;
	struct ureg_src *values = NULL;

	if (is_copy) {
		srcbuf = ureg_DECL_buffer(ureg, 1, false);
		values = malloc(num_mem_ops * sizeof(struct ureg_src));
	}

	/* If there are multiple stores, the first store writes into 0+tid,
	 * the 2nd store writes into 64+tid, the 3rd store writes into 128+tid, etc.
	 */
	ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, 64 * num_mem_ops), tid);
	/* Convert from a "store size unit" into bytes. */
	ureg_UMUL(ureg, store_addr, ureg_src(store_addr),
		  ureg_imm1u(ureg, 4 * inst_dwords[0]));
	ureg_MOV(ureg, load_addr, ureg_src(store_addr));

	/* Distance between a load and a store for latency hiding. */
	unsigned load_store_distance = is_copy ? 8 : 0;

	for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) {
		int d = i - load_store_distance;

		if (is_copy && i < num_mem_ops) {
			if (i) {
				ureg_UADD(ureg, load_addr, ureg_src(load_addr),
					  ureg_imm1u(ureg, 4 * inst_dwords[i] * 64));
			}

			values[i] = ureg_src(ureg_DECL_temporary(ureg));
			struct ureg_dst dst =
				ureg_writemask(ureg_dst(values[i]),
					       u_bit_consecutive(0, inst_dwords[i]));
			struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)};
			ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2,
					 load_qualifier, TGSI_TEXTURE_BUFFER, 0);
		}

		if (d >= 0) {
			if (d) {
				ureg_UADD(ureg, store_addr, ureg_src(store_addr),
					  ureg_imm1u(ureg, 4 * inst_dwords[d] * 64));
			}

			struct ureg_dst dst =
				ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d]));
			struct ureg_src srcs[] =
				{ureg_src(store_addr), is_copy ? values[d] : value};
			ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2,
					 store_qualifier, TGSI_TEXTURE_BUFFER, 0);
		}
	}
	ureg_END(ureg);

	struct pipe_compute_state state = {};
	state.ir_type = PIPE_SHADER_IR_TGSI;
	state.prog = ureg_get_tokens(ureg, NULL);

	void *cs = ctx->create_compute_state(ctx, &state);
	ureg_destroy(ureg);
	free(values);
	return cs;
}
Beispiel #5
0
void si_pm4_reset_emitted(struct si_context *sctx)
{
	memset(&sctx->emitted, 0, sizeof(sctx->emitted));
	sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES);
}
Beispiel #6
0
static void
si_flush_depth_texture(struct si_context *sctx,
		       struct r600_texture *tex,
		       unsigned required_planes,
		       unsigned first_level, unsigned last_level,
		       unsigned first_layer, unsigned last_layer)
{
	unsigned inplace_planes = 0;
	unsigned copy_planes = 0;
	unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1);
	unsigned levels_z = 0;
	unsigned levels_s = 0;

	if (required_planes & PIPE_MASK_Z) {
		levels_z = level_mask & tex->dirty_level_mask;

		if (levels_z) {
			if (r600_can_sample_zs(tex, false))
				inplace_planes |= PIPE_MASK_Z;
			else
				copy_planes |= PIPE_MASK_Z;
		}
	}
	if (required_planes & PIPE_MASK_S) {
		levels_s = level_mask & tex->stencil_dirty_level_mask;

		if (levels_s) {
			if (r600_can_sample_zs(tex, true))
				inplace_planes |= PIPE_MASK_S;
			else
				copy_planes |= PIPE_MASK_S;
		}
	}

	/* We may have to allocate the flushed texture here when called from
	 * si_decompress_subresource.
	 */
	if (copy_planes &&
	    (tex->flushed_depth_texture ||
	     r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
		struct r600_texture *dst = tex->flushed_depth_texture;
		unsigned fully_copied_levels;
		unsigned levels = 0;

		assert(tex->flushed_depth_texture);

		if (util_format_is_depth_and_stencil(dst->resource.b.b.format))
			copy_planes = PIPE_MASK_Z | PIPE_MASK_S;

		if (copy_planes & PIPE_MASK_Z) {
			levels |= levels_z;
			levels_z = 0;
		}
		if (copy_planes & PIPE_MASK_S) {
			levels |= levels_s;
			levels_s = 0;
		}

		fully_copied_levels = si_blit_dbcb_copy(
			sctx, tex, dst, copy_planes, levels,
			first_layer, last_layer,
			0, u_max_sample(&tex->resource.b.b));

		if (copy_planes & PIPE_MASK_Z)
			tex->dirty_level_mask &= ~fully_copied_levels;
		if (copy_planes & PIPE_MASK_S)
			tex->stencil_dirty_level_mask &= ~fully_copied_levels;
	}

	if (inplace_planes) {
		si_blit_decompress_zs_in_place(
			sctx, tex,
			levels_z, levels_s,
			first_layer, last_layer);
	}
}
Beispiel #7
0
/* Too complex to figure out, just check every time:
 */
static void check_program_state( struct st_context *st )
{
   struct gl_context *ctx = st->ctx;
   struct st_vertex_program *old_vp = st->vp;
   struct st_common_program *old_tcp = st->tcp;
   struct st_common_program *old_tep = st->tep;
   struct st_common_program *old_gp = st->gp;
   struct st_fragment_program *old_fp = st->fp;

   struct gl_program *new_vp = ctx->VertexProgram._Current;
   struct gl_program *new_tcp = ctx->TessCtrlProgram._Current;
   struct gl_program *new_tep = ctx->TessEvalProgram._Current;
   struct gl_program *new_gp = ctx->GeometryProgram._Current;
   struct gl_program *new_fp = ctx->FragmentProgram._Current;
   uint64_t dirty = 0;
   unsigned num_viewports = 1;

   /* Flag states used by both new and old shaders to unbind shader resources
    * properly when transitioning to shaders that don't use them.
    */
   if (unlikely(new_vp != &old_vp->Base)) {
      if (old_vp)
         dirty |= old_vp->affected_states;
      if (new_vp)
         dirty |= ST_NEW_VERTEX_PROGRAM(st, st_vertex_program(new_vp));
   }

   if (unlikely(new_tcp != &old_tcp->Base)) {
      if (old_tcp)
         dirty |= old_tcp->affected_states;
      if (new_tcp)
         dirty |= st_common_program(new_tcp)->affected_states;
   }

   if (unlikely(new_tep != &old_tep->Base)) {
      if (old_tep)
         dirty |= old_tep->affected_states;
      if (new_tep)
         dirty |= st_common_program(new_tep)->affected_states;
   }

   if (unlikely(new_gp != &old_gp->Base)) {
      if (old_gp)
         dirty |= old_gp->affected_states;
      if (new_gp)
         dirty |= st_common_program(new_gp)->affected_states;
   }

   if (unlikely(new_fp != &old_fp->Base)) {
      if (old_fp)
         dirty |= old_fp->affected_states;
      if (new_fp)
         dirty |= st_fragment_program(new_fp)->affected_states;
   }

   /* Find out the number of viewports. This determines how many scissors
    * and viewport states we need to update.
    */
   struct gl_program *last_prim_shader = new_gp ? new_gp :
                                         new_tep ? new_tep : new_vp;
   if (last_prim_shader &&
       last_prim_shader->info.outputs_written & VARYING_BIT_VIEWPORT)
      num_viewports = ctx->Const.MaxViewports;

   if (st->state.num_viewports != num_viewports) {
      st->state.num_viewports = num_viewports;
      dirty |= ST_NEW_VIEWPORT;

      if (ctx->Scissor.EnableFlags & u_bit_consecutive(0, num_viewports))
         dirty |= ST_NEW_SCISSOR;
   }

   st->dirty |= dirty;
}
Beispiel #8
0
static bool vi_get_fast_clear_parameters(enum pipe_format base_format,
					 enum pipe_format surface_format,
					 const union pipe_color_union *color,
					 uint32_t* clear_value,
					 bool *eliminate_needed)
{
	/* If we want to clear without needing a fast clear eliminate step, we
	 * can set color and alpha independently to 0 or 1 (or 0/max for integer
	 * formats).
	 */
	bool values[4] = {}; /* whether to clear to 0 or 1 */
	bool color_value = false; /* clear color to 0 or 1 */
	bool alpha_value = false; /* clear alpha to 0 or 1 */
	int alpha_channel; /* index of the alpha component */
	bool has_color = false;
	bool has_alpha = false;

	const struct util_format_description *desc =
		util_format_description(si_simplify_cb_format(surface_format));

	/* 128-bit fast clear with different R,G,B values is unsupported. */
	if (desc->block.bits == 128 &&
	    (color->ui[0] != color->ui[1] ||
	     color->ui[0] != color->ui[2]))
		return false;

	*eliminate_needed = true;
	*clear_value = DCC_CLEAR_COLOR_REG;

	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
		return true; /* need ELIMINATE_FAST_CLEAR */

	bool base_alpha_is_on_msb = vi_alpha_is_on_msb(base_format);
	bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(surface_format);

	/* Formats with 3 channels can't have alpha. */
	if (desc->nr_channels == 3)
		alpha_channel = -1;
	else if (surf_alpha_is_on_msb)
		alpha_channel = desc->nr_channels - 1;
	else
		alpha_channel = 0;

	for (int i = 0; i < 4; ++i) {
		if (desc->swizzle[i] >= PIPE_SWIZZLE_0)
			continue;

		if (desc->channel[i].pure_integer &&
		    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
			/* Use the maximum value for clamping the clear color. */
			int max = u_bit_consecutive(0, desc->channel[i].size - 1);

			values[i] = color->i[i] != 0;
			if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
				return true; /* need ELIMINATE_FAST_CLEAR */
		} else if (desc->channel[i].pure_integer &&
			   desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
			/* Use the maximum value for clamping the clear color. */
			unsigned max = u_bit_consecutive(0, desc->channel[i].size);

			values[i] = color->ui[i] != 0U;
			if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
				return true; /* need ELIMINATE_FAST_CLEAR */
		} else {
			values[i] = color->f[i] != 0.0F;
			if (color->f[i] != 0.0F && color->f[i] != 1.0F)
				return true; /* need ELIMINATE_FAST_CLEAR */
		}

		if (desc->swizzle[i] == alpha_channel) {
			alpha_value = values[i];
			has_alpha = true;
		} else {
			color_value = values[i];
			has_color = true;
		}
	}

	/* If alpha isn't present, make it the same as color, and vice versa. */
	if (!has_alpha)
		alpha_value = color_value;
	else if (!has_color)
		color_value = alpha_value;

	if (color_value != alpha_value &&
	    base_alpha_is_on_msb != surf_alpha_is_on_msb)
		return true; /* require ELIMINATE_FAST_CLEAR */

	/* Check if all color values are equal if they are present. */
	for (int i = 0; i < 4; ++i) {
		if (desc->swizzle[i] <= PIPE_SWIZZLE_W &&
		    desc->swizzle[i] != alpha_channel &&
		    values[i] != color_value)
			return true; /* require ELIMINATE_FAST_CLEAR */
	}

	/* This doesn't need ELIMINATE_FAST_CLEAR.
	 * On chips predating Raven2, the DCC clear codes and the CB clear
	 * color registers must match.
	 */
	*eliminate_needed = false;

	if (color_value) {
		if (alpha_value)
			*clear_value = DCC_CLEAR_COLOR_1111;
		else
			*clear_value = DCC_CLEAR_COLOR_1110;
	} else {
		if (alpha_value)
			*clear_value = DCC_CLEAR_COLOR_0001;
		else
			*clear_value = DCC_CLEAR_COLOR_0000;
	}
	return true;
}