Beispiel #1
0
void *
util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                   enum tgsi_texture_type tgsi_tex,
                                   unsigned nr_samples,
                                   enum tgsi_return_type stype)
{
   struct ureg_program *ureg;
   struct ureg_src sampler, coord;
   struct ureg_dst out, tmp, top, bottom;
   struct ureg_dst tmp_coord[4], tmp_sum[4];
   unsigned i, c;

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   /* Declarations. */
   sampler = ureg_DECL_sampler(ureg, 0);
   ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype);
   coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
                              TGSI_INTERPOLATE_LINEAR);
   out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   for (c = 0; c < 4; c++)
      tmp_sum[c] = ureg_DECL_temporary(ureg);
   for (c = 0; c < 4; c++)
      tmp_coord[c] = ureg_DECL_temporary(ureg);
   tmp = ureg_DECL_temporary(ureg);
   top = ureg_DECL_temporary(ureg);
   bottom = ureg_DECL_temporary(ureg);

   /* Instructions. */
   for (c = 0; c < 4; c++)
      ureg_MOV(ureg, tmp_sum[c], ureg_imm1f(ureg, 0));

   /* Get 4 texture coordinates for the bilinear filter. */
   ureg_F2U(ureg, tmp_coord[0], coord); /* top-left */
   ureg_UADD(ureg, tmp_coord[1], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 1, 0, 0, 0)); /* top-right */
   ureg_UADD(ureg, tmp_coord[2], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 0, 1, 0, 0)); /* bottom-left */
   ureg_UADD(ureg, tmp_coord[3], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 1, 1, 0, 0)); /* bottom-right */

   for (i = 0; i < nr_samples; i++) {
      for (c = 0; c < 4; c++) {
         /* Read one sample. */
         ureg_MOV(ureg, ureg_writemask(tmp_coord[c], TGSI_WRITEMASK_W),
                  ureg_imm1u(ureg, i));
         ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler);

         if (stype == TGSI_RETURN_TYPE_UINT)
            ureg_U2F(ureg, tmp, ureg_src(tmp));
         else if (stype == TGSI_RETURN_TYPE_SINT)
            ureg_I2F(ureg, tmp, ureg_src(tmp));

         /* Add it to the sum.*/
         ureg_ADD(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_src(tmp));
      }
   }

   /* Calculate the average. */
   for (c = 0; c < 4; c++)
      ureg_MUL(ureg, tmp_sum[c], ureg_src(tmp_sum[c]),
               ureg_imm1f(ureg, 1.0 / nr_samples));

   /* Take the 4 average values and apply a standard bilinear filter. */
   ureg_FRC(ureg, tmp, coord);

   ureg_LRP(ureg, top,
            ureg_scalar(ureg_src(tmp), 0),
            ureg_src(tmp_sum[1]),
            ureg_src(tmp_sum[0]));

   ureg_LRP(ureg, bottom,
            ureg_scalar(ureg_src(tmp), 0),
            ureg_src(tmp_sum[3]),
            ureg_src(tmp_sum[2]));

   ureg_LRP(ureg, tmp,
            ureg_scalar(ureg_src(tmp), 1),
            ureg_src(bottom),
            ureg_src(top));

   /* Convert to the texture format and return. */
   if (stype == TGSI_RETURN_TYPE_UINT)
      ureg_F2U(ureg, out, ureg_src(tmp));
   else if (stype == TGSI_RETURN_TYPE_SINT)
      ureg_F2I(ureg, out, ureg_src(tmp));
   else
      ureg_MOV(ureg, out, ureg_src(tmp));

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}
Beispiel #2
0
/* Create a compute shader implementing clear_buffer or copy_buffer. */
void *si_create_dma_compute_shader(struct pipe_context *ctx,
				   unsigned num_dwords_per_thread,
				   bool dst_stream_cache_policy, bool is_copy)
{
	assert(util_is_power_of_two_nonzero(num_dwords_per_thread));

	unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT;
	if (dst_stream_cache_policy)
		store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY;

	/* Don't cache loads, because there is no reuse. */
	unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY;

	unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4);
	unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned));

	for (unsigned i = 0; i < num_mem_ops; i++) {
		if (i*4 < num_dwords_per_thread)
			inst_dwords[i] = MIN2(4, num_dwords_per_thread - i*4);
	}

	struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE);
	if (!ureg)
		return NULL;

	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64);
	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1);
	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1);

	struct ureg_src value;
	if (!is_copy) {
		ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_DWORDS, inst_dwords[0]);
		value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA, 0);
	}

	struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0);
	struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0);
	struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
	struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
	struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false));
	struct ureg_src srcbuf;
	struct ureg_src *values = NULL;

	if (is_copy) {
		srcbuf = ureg_DECL_buffer(ureg, 1, false);
		values = malloc(num_mem_ops * sizeof(struct ureg_src));
	}

	/* If there are multiple stores, the first store writes into 0+tid,
	 * the 2nd store writes into 64+tid, the 3rd store writes into 128+tid, etc.
	 */
	ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, 64 * num_mem_ops), tid);
	/* Convert from a "store size unit" into bytes. */
	ureg_UMUL(ureg, store_addr, ureg_src(store_addr),
		  ureg_imm1u(ureg, 4 * inst_dwords[0]));
	ureg_MOV(ureg, load_addr, ureg_src(store_addr));

	/* Distance between a load and a store for latency hiding. */
	unsigned load_store_distance = is_copy ? 8 : 0;

	for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) {
		int d = i - load_store_distance;

		if (is_copy && i < num_mem_ops) {
			if (i) {
				ureg_UADD(ureg, load_addr, ureg_src(load_addr),
					  ureg_imm1u(ureg, 4 * inst_dwords[i] * 64));
			}

			values[i] = ureg_src(ureg_DECL_temporary(ureg));
			struct ureg_dst dst =
				ureg_writemask(ureg_dst(values[i]),
					       u_bit_consecutive(0, inst_dwords[i]));
			struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)};
			ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2,
					 load_qualifier, TGSI_TEXTURE_BUFFER, 0);
		}

		if (d >= 0) {
			if (d) {
				ureg_UADD(ureg, store_addr, ureg_src(store_addr),
					  ureg_imm1u(ureg, 4 * inst_dwords[d] * 64));
			}

			struct ureg_dst dst =
				ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d]));
			struct ureg_src srcs[] =
				{ureg_src(store_addr), is_copy ? values[d] : value};
			ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2,
					 store_qualifier, TGSI_TEXTURE_BUFFER, 0);
		}
	}
	ureg_END(ureg);

	struct pipe_compute_state state = {};
	state.ir_type = PIPE_SHADER_IR_TGSI;
	state.prog = ureg_get_tokens(ureg, NULL);

	void *cs = ctx->create_compute_state(ctx, &state);
	ureg_destroy(ureg);
	free(values);
	return cs;
}
Beispiel #3
0
static void *
create_fs(struct st_context *st, bool download, enum pipe_texture_target target)
{
   struct pipe_context *pipe = st->pipe;
   struct pipe_screen *screen = pipe->screen;
   struct ureg_program *ureg;
   bool have_layer;
   struct ureg_dst out;
   struct ureg_src sampler;
   struct ureg_src pos;
   struct ureg_src layer;
   struct ureg_src const0;
   struct ureg_src const1;
   struct ureg_dst temp0;

   have_layer =
      st->pbo.layers &&
      (!download || target == PIPE_TEXTURE_1D_ARRAY
                 || target == PIPE_TEXTURE_2D_ARRAY
                 || target == PIPE_TEXTURE_3D
                 || target == PIPE_TEXTURE_CUBE
                 || target == PIPE_TEXTURE_CUBE_ARRAY);

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   if (!download) {
      out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   } else {
      struct ureg_src image;

      /* writeonly images do not require an explicitly given format. */
      image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE,
                                    true, false);
      out = ureg_dst(image);
   }

   sampler = ureg_DECL_sampler(ureg, 0);
   if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
      pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
   } else {
      pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
                               TGSI_INTERPOLATE_LINEAR);
   }
   if (have_layer) {
      layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
                                       TGSI_INTERPOLATE_CONSTANT);
   }
   const0  = ureg_DECL_constant(ureg, 0);
   const1  = ureg_DECL_constant(ureg, 1);
   temp0   = ureg_DECL_temporary(ureg);

   /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */

   /* temp0.xy = f2i(temp0.xy) */
   ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                  ureg_swizzle(pos,
                               TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                               TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.xy = temp0.xy + const0.xy */
   ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                   ureg_swizzle(ureg_src(temp0),
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
                   ureg_swizzle(const0,
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.x = const0.z * temp0.y + temp0.x */
   ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                   ureg_scalar(const0, TGSI_SWIZZLE_Z),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));

   if (have_layer) {
      /* temp0.x = const0.w * layer + temp0.x */
      ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                      ureg_scalar(const0, TGSI_SWIZZLE_W),
                      ureg_scalar(layer, TGSI_SWIZZLE_X),
                      ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
   }

   /* temp0.w = 0 */
   ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));

   if (download) {
      struct ureg_dst temp1;
      struct ureg_src op[2];

      temp1 = ureg_DECL_temporary(ureg);

      /* temp1.xy = pos.xy */
      ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos);

      /* temp1.zw = 0 */
      ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0));

      if (have_layer) {
         struct ureg_dst temp1_layer =
            ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y
                                                                  : TGSI_WRITEMASK_Z);

         /* temp1.y/z = layer */
         ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X));

         if (target == PIPE_TEXTURE_3D) {
            /* temp1.z += layer_offset */
            ureg_UADD(ureg, temp1_layer,
                            ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z),
                            ureg_scalar(const1, TGSI_SWIZZLE_X));
         }
      }

      /* temp1 = txf(sampler, temp1) */
      ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1),
                     ureg_src(temp1), sampler);

      /* store(out, temp0, temp1) */
      op[0] = ureg_src(temp0);
      op[1] = ureg_src(temp1);
      ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0,
                             TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE);

      ureg_release_temporary(ureg, temp1);
   } else {
      /* out = txf(sampler, temp0.x) */
      ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);
   }

   ureg_release_temporary(ureg, temp0);

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}