예제 #1
0
static void
lower_load_store(nir_builder *b,
                 nir_intrinsic_instr *intrin,
                 glsl_type_size_align_func size_align)
{
   b->cursor = nir_before_instr(&intrin->instr);

   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
   nir_variable *var = nir_deref_instr_get_variable(deref);

   nir_ssa_def *offset =
      nir_iadd_imm(b, nir_build_deref_offset(b, deref, size_align),
                      var->data.location);

   unsigned align, UNUSED size;
   size_align(deref->type, &size, &align);

   if (intrin->intrinsic == nir_intrinsic_load_deref) {
      nir_intrinsic_instr *load =
         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_scratch);
      load->num_components = intrin->num_components;
      load->src[0] = nir_src_for_ssa(offset);
      nir_intrinsic_set_align(load, align, 0);
      nir_ssa_dest_init(&load->instr, &load->dest,
                        intrin->dest.ssa.num_components,
                        intrin->dest.ssa.bit_size, NULL);
      nir_builder_instr_insert(b, &load->instr);

      nir_ssa_def *value = &load->dest.ssa;
      if (glsl_type_is_boolean(deref->type))
         value = nir_b2i32(b, value);

      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                               nir_src_for_ssa(&load->dest.ssa));
   } else {
      assert(intrin->intrinsic == nir_intrinsic_store_deref);

      assert(intrin->src[1].is_ssa);
      nir_ssa_def *value = intrin->src[1].ssa;
      if (glsl_type_is_boolean(deref->type))
         value = nir_i2b(b, value);

      nir_intrinsic_instr *store =
         nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_scratch);
      store->num_components = intrin->num_components;
      store->src[0] = nir_src_for_ssa(value);
      store->src[1] = nir_src_for_ssa(offset);
      nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
      nir_intrinsic_set_align(store, align, 0);
      nir_builder_instr_insert(b, &store->instr);
   }

   nir_instr_remove(&intrin->instr);
   nir_deref_instr_remove_if_unused(deref);
}
static void
emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
                nir_deref_var *deref, nir_deref *tail,
                nir_ssa_def **dest, nir_ssa_def *src)
{
   for (; tail->child; tail = tail->child) {
      if (tail->child->deref_type != nir_deref_type_array)
         continue;

      nir_deref_array *arr = nir_deref_as_array(tail->child);
      if (arr->deref_array_type != nir_deref_array_type_indirect)
         continue;

      int length = glsl_get_length(tail->type);

      emit_indirect_load_store(b, orig_instr, deref, tail, -arr->base_offset,
                               length - arr->base_offset, dest, src);
      return;
   }

   assert(tail && tail->child == NULL);

   /* We reached the end of the deref chain.  Emit the instruction */

   if (src == NULL) {
      /* This is a load instruction */
      nir_intrinsic_instr *load =
         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
      load->num_components = orig_instr->num_components;
      load->variables[0] =
         nir_deref_as_var(nir_copy_deref(load, &deref->deref));
      unsigned bit_size = orig_instr->dest.ssa.bit_size;
      nir_ssa_dest_init(&load->instr, &load->dest,
                        load->num_components, bit_size, NULL);
      nir_builder_instr_insert(b, &load->instr);
      *dest = &load->dest.ssa;
   } else {
      /* This is a store instruction */
      nir_intrinsic_instr *store =
         nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
      store->num_components = orig_instr->num_components;
      nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(orig_instr));
      store->variables[0] =
         nir_deref_as_var(nir_copy_deref(store, &deref->deref));
      store->src[0] = nir_src_for_ssa(src);
      nir_builder_instr_insert(b, &store->instr);
   }
}
예제 #3
0
static bool
constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
{
   bool progress = false;

   if (instr->intrinsic == nir_intrinsic_discard_if &&
       nir_src_is_const(instr->src[0])) {
      if (nir_src_as_bool(instr->src[0])) {
         /* This method of getting a nir_shader * from a nir_instr is
          * admittedly gross, but given the rarity of hitting this case I think
          * it's preferable to plumbing an otherwise unused nir_shader *
          * parameter through four functions to get here.
          */
         nir_cf_node *cf_node = &instr->instr.block->cf_node;
         nir_function_impl *impl = nir_cf_node_get_function(cf_node);
         nir_shader *shader = impl->function->shader;

         nir_intrinsic_instr *discard =
            nir_intrinsic_instr_create(shader, nir_intrinsic_discard);
         nir_instr_insert_before(&instr->instr, &discard->instr);
         nir_instr_remove(&instr->instr);
         progress = true;
      } else {
         /* We're not discarding, just delete the instruction */
         nir_instr_remove(&instr->instr);
         progress = true;
      }
   }

   return progress;
}
예제 #4
0
파일: vtn_variables.c 프로젝트: menpo/mesa
static nir_ssa_def *
get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
                          struct vtn_type **type, unsigned *chain_idx)
{
   /* Push constants have no explicit binding */
   if (chain->var->mode == vtn_variable_mode_push_constant) {
      *chain_idx = 0;
      *type = chain->var->type;
      return NULL;
   }

   nir_ssa_def *array_index;
   if (glsl_type_is_array(chain->var->type->type)) {
      assert(chain->length > 0);
      array_index = vtn_access_link_as_ssa(b, chain->link[0], 1);
      *chain_idx = 1;
      *type = chain->var->type->array_element;
   } else {
      array_index = nir_imm_int(&b->nb, 0);
      *chain_idx = 0;
      *type = chain->var->type;
   }

   nir_intrinsic_instr *instr =
      nir_intrinsic_instr_create(b->nb.shader,
                                 nir_intrinsic_vulkan_resource_index);
   instr->src[0] = nir_src_for_ssa(array_index);
   nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
   nir_intrinsic_set_binding(instr, chain->var->binding);

   nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
   nir_builder_instr_insert(&b->nb, &instr->instr);

   return &instr->dest.ssa;
}
static void
build_color_shaders(struct nir_shader **out_vs,
                    struct nir_shader **out_fs,
                    uint32_t frag_output)
{
	nir_builder vs_b;
	nir_builder fs_b;

	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);

	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");

	const struct glsl_type *position_type = glsl_vec4_type();
	const struct glsl_type *color_type = glsl_vec4_type();

	nir_variable *vs_out_pos =
		nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
				    "gl_Position");
	vs_out_pos->data.location = VARYING_SLOT_POS;

	nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
	nir_intrinsic_set_base(in_color_load, 0);
	nir_intrinsic_set_range(in_color_load, 16);
	in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
	in_color_load->num_components = 4;
	nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 4, 32, "clear color");
	nir_builder_instr_insert(&fs_b, &in_color_load->instr);

	nir_variable *fs_out_color =
		nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
				    "f_color");
	fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;

	nir_store_var(&fs_b, fs_out_color, &in_color_load->dest.ssa, 0xf);

	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);

	const struct glsl_type *layer_type = glsl_int_type();
	nir_variable *vs_out_layer =
		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
				    "v_layer");
	vs_out_layer->data.location = VARYING_SLOT_LAYER;
	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
	nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
	nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0);

	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);

	*out_vs = vs_b.shader;
	*out_fs = fs_b.shader;
}
예제 #6
0
파일: vtn_glsl450.c 프로젝트: etnaviv/mesa
static void
handle_glsl450_interpolation(struct vtn_builder *b, enum GLSLstd450 opcode,
                             const uint32_t *w, unsigned count)
{
   const struct glsl_type *dest_type =
      vtn_value(b, w[1], vtn_value_type_type)->type->type;

   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
   val->ssa = vtn_create_ssa_value(b, dest_type);

   nir_intrinsic_op op;
   switch (opcode) {
   case GLSLstd450InterpolateAtCentroid:
      op = nir_intrinsic_interp_var_at_centroid;
      break;
   case GLSLstd450InterpolateAtSample:
      op = nir_intrinsic_interp_var_at_sample;
      break;
   case GLSLstd450InterpolateAtOffset:
      op = nir_intrinsic_interp_var_at_offset;
      break;
   default:
      unreachable("Invalid opcode");
   }

   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);

   nir_deref_var *deref = vtn_nir_deref(b, w[5]);
   intrin->variables[0] =
      nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));

   switch (opcode) {
   case GLSLstd450InterpolateAtCentroid:
      break;
   case GLSLstd450InterpolateAtSample:
   case GLSLstd450InterpolateAtOffset:
      intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
      break;
   default:
      unreachable("Invalid opcode");
   }

   intrin->num_components = glsl_get_vector_elements(dest_type);
   nir_ssa_dest_init(&intrin->instr, &intrin->dest,
                     glsl_get_vector_elements(dest_type),
                     glsl_get_bit_size(dest_type), NULL);
   val->ssa->def = &intrin->dest.ssa;

   nir_builder_instr_insert(&b->nb, &intrin->instr);
}
static nir_shader *
build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
{
	nir_builder b;
	char name[64];
	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
	const struct glsl_type *vec4 = glsl_vec4_type();
	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
								 false,
								 false,
								 GLSL_TYPE_FLOAT);

	snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
	b.shader->info.name = ralloc_strdup(b.shader, name);

	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
						      sampler_type, "s_tex");
	input_img->data.descriptor_set = 0;
	input_img->data.binding = 0;

	nir_variable *fs_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "fs_pos_in");
	fs_pos_in->data.location = VARYING_SLOT_POS;

	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
						      vec4, "f_color");
	color_out->data.location = FRAG_RESULT_DATA0;

	nir_ssa_def *pos_in = nir_load_var(&b, fs_pos_in);
	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
	nir_intrinsic_set_base(src_offset, 0);
	nir_intrinsic_set_range(src_offset, 8);
	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
	src_offset->num_components = 2;
	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
	nir_builder_instr_insert(&b, &src_offset->instr);

	nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);

	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3);
	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
	                                    color, img_coord);

	nir_ssa_def *outval = nir_load_var(&b, color);
	nir_store_var(&b, color_out, outval, 0xf);
	return b.shader;
}
예제 #8
0
static void
vtn_build_subgroup_instr(struct vtn_builder *b,
                         nir_intrinsic_op nir_op,
                         struct vtn_ssa_value *dst,
                         struct vtn_ssa_value *src0,
                         nir_ssa_def *index,
                         unsigned const_idx0,
                         unsigned const_idx1)
{
   /* Some of the subgroup operations take an index.  SPIR-V allows this to be
    * any integer type.  To make things simpler for drivers, we only support
    * 32-bit indices.
    */
   if (index && index->bit_size != 32)
      index = nir_u2u32(&b->nb, index);

   vtn_assert(dst->type == src0->type);
   if (!glsl_type_is_vector_or_scalar(dst->type)) {
      for (unsigned i = 0; i < glsl_get_length(dst->type); i++) {
         vtn_build_subgroup_instr(b, nir_op, dst->elems[i],
                                  src0->elems[i], index,
                                  const_idx0, const_idx1);
      }
      return;
   }

   nir_intrinsic_instr *intrin =
      nir_intrinsic_instr_create(b->nb.shader, nir_op);
   nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
                              dst->type, NULL);
   intrin->num_components = intrin->dest.ssa.num_components;

   intrin->src[0] = nir_src_for_ssa(src0->def);
   if (index)
      intrin->src[1] = nir_src_for_ssa(index);

   intrin->const_index[0] = const_idx0;
   intrin->const_index[1] = const_idx1;

   nir_builder_instr_insert(&b->nb, &intrin->instr);

   dst->def = &intrin->dest.ssa;
}
예제 #9
0
파일: vtn_variables.c 프로젝트: menpo/mesa
static void
_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
                     nir_ssa_def *index, nir_ssa_def *offset,
                     struct vtn_ssa_value **inout, const struct glsl_type *type)
{
   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
   instr->num_components = glsl_get_vector_elements(type);

   int src = 0;
   if (!load) {
      nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1);
      instr->src[src++] = nir_src_for_ssa((*inout)->def);
   }

   /* We set the base and size for push constant load to the entire push
    * constant block for now.
    */
   if (op == nir_intrinsic_load_push_constant) {
      nir_intrinsic_set_base(instr, 0);
      nir_intrinsic_set_range(instr, 128);
   }

   if (index)
      instr->src[src++] = nir_src_for_ssa(index);

   instr->src[src++] = nir_src_for_ssa(offset);

   if (load) {
      nir_ssa_dest_init(&instr->instr, &instr->dest,
                        instr->num_components,
                        glsl_get_bit_size(glsl_get_base_type(type)), NULL);
      (*inout)->def = &instr->dest.ssa;
   }

   nir_builder_instr_insert(&b->nb, &instr->instr);

   if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
      (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
}
예제 #10
0
static void
lower_instr(nir_intrinsic_instr *instr,
            lower_atomic_state *state)
{
   nir_intrinsic_op op;
   switch (instr->intrinsic) {
   case nir_intrinsic_atomic_counter_read_var:
      op = nir_intrinsic_atomic_counter_read;
      break;

   case nir_intrinsic_atomic_counter_inc_var:
      op = nir_intrinsic_atomic_counter_inc;
      break;

   case nir_intrinsic_atomic_counter_dec_var:
      op = nir_intrinsic_atomic_counter_dec;
      break;

   default:
      return;
   }

   if (instr->variables[0]->var->data.mode != nir_var_uniform &&
       instr->variables[0]->var->data.mode != nir_var_shader_storage)
      return; /* atomics passed as function arguments can't be lowered */

   void *mem_ctx = ralloc_parent(instr);
   unsigned uniform_loc = instr->variables[0]->var->data.location;

   nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
   new_instr->const_index[0] =
      state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;

   nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
   offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;

   nir_instr_insert_before(&instr->instr, &offset_const->instr);

   nir_ssa_def *offset_def = &offset_const->def;

   nir_deref *tail = &instr->variables[0]->deref;
   while (tail->child != NULL) {
      assert(tail->child->deref_type == nir_deref_type_array);
      nir_deref_array *deref_array = nir_deref_as_array(tail->child);
      tail = tail->child;

      unsigned child_array_elements = tail->child != NULL ?
         glsl_get_aoa_size(tail->type) : 1;

      offset_const->value.u[0] += deref_array->base_offset *
         child_array_elements * ATOMIC_COUNTER_SIZE;

      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
         nir_load_const_instr *atomic_counter_size =
               nir_load_const_instr_create(mem_ctx, 1);
         atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
         nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);

         nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
         mul->dest.write_mask = 0x1;
         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
         mul->src[1].src.is_ssa = true;
         mul->src[1].src.ssa = &atomic_counter_size->def;
         nir_instr_insert_before(&instr->instr, &mul->instr);

         nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
         nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
         add->dest.write_mask = 0x1;
         add->src[0].src.is_ssa = true;
         add->src[0].src.ssa = &mul->dest.dest.ssa;
         add->src[1].src.is_ssa = true;
         add->src[1].src.ssa = offset_def;
         nir_instr_insert_before(&instr->instr, &add->instr);

         offset_def = &add->dest.dest.ssa;
      }
   }

   new_instr->src[0].is_ssa = true;
   new_instr->src[0].ssa = offset_def;

   if (instr->dest.is_ssa) {
      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
                        instr->dest.ssa.num_components, NULL);
      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
                               nir_src_for_ssa(&new_instr->dest.ssa));
   } else {
      nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
   }

   nir_instr_insert_before(&instr->instr, &new_instr->instr);
   nir_instr_remove(&instr->instr);
}
예제 #11
0
static nir_shader *
create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,
                       const nir_shader_compiler_options *options,
                       const struct brw_tcs_prog_key *key)
{
   nir_builder b;
   nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_TESS_CTRL,
                                  options);
   nir_shader *nir = b.shader;
   nir_variable *var;
   nir_intrinsic_instr *load;
   nir_intrinsic_instr *store;
   nir_ssa_def *zero = nir_imm_int(&b, 0);
   nir_ssa_def *invoc_id =
      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);

   nir->info->inputs_read = key->outputs_written;
   nir->info->outputs_written = key->outputs_written;
   nir->info->tcs.vertices_out = key->input_vertices;
   nir->info->name = ralloc_strdup(nir, "passthrough");
   nir->num_uniforms = 8 * sizeof(uint32_t);

   var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0");
   var->data.location = 0;
   var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1");
   var->data.location = 1;

   /* Write the patch URB header. */
   for (int i = 0; i <= 1; i++) {
      load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
      load->num_components = 4;
      load->src[0] = nir_src_for_ssa(zero);
      nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
      nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t));
      nir_builder_instr_insert(&b, &load->instr);

      store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output);
      store->num_components = 4;
      store->src[0] = nir_src_for_ssa(&load->dest.ssa);
      store->src[1] = nir_src_for_ssa(zero);
      nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i);
      nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
      nir_builder_instr_insert(&b, &store->instr);
   }

   /* Copy inputs to outputs. */
   uint64_t varyings = key->outputs_written;

   while (varyings != 0) {
      const int varying = ffsll(varyings) - 1;

      load = nir_intrinsic_instr_create(nir,
                                        nir_intrinsic_load_per_vertex_input);
      load->num_components = 4;
      load->src[0] = nir_src_for_ssa(invoc_id);
      load->src[1] = nir_src_for_ssa(zero);
      nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
      nir_intrinsic_set_base(load, varying);
      nir_builder_instr_insert(&b, &load->instr);

      store = nir_intrinsic_instr_create(nir,
                                         nir_intrinsic_store_per_vertex_output);
      store->num_components = 4;
      store->src[0] = nir_src_for_ssa(&load->dest.ssa);
      store->src[1] = nir_src_for_ssa(invoc_id);
      store->src[2] = nir_src_for_ssa(zero);
      nir_intrinsic_set_base(store, varying);
      nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
      nir_builder_instr_insert(&b, &store->instr);

      varyings &= ~BITFIELD64_BIT(varying);
   }

   nir_validate_shader(nir);

   nir = brw_preprocess_nir(compiler, nir);

   return nir;
}
예제 #12
0
void
vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
                    const uint32_t *w, unsigned count)
{
   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);

   val->ssa = vtn_create_ssa_value(b, val->type->type);

   switch (opcode) {
   case SpvOpGroupNonUniformElect: {
      vtn_fail_if(val->type->type != glsl_bool_type(),
                  "OpGroupNonUniformElect must return a Bool");
      nir_intrinsic_instr *elect =
         nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_elect);
      nir_ssa_dest_init_for_type(&elect->instr, &elect->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &elect->instr);
      val->ssa->def = &elect->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformBallot: {
      vtn_fail_if(val->type->type != glsl_vector_type(GLSL_TYPE_UINT, 4),
                  "OpGroupNonUniformBallot must return a uvec4");
      nir_intrinsic_instr *ballot =
         nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot);
      ballot->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
      nir_ssa_dest_init(&ballot->instr, &ballot->dest, 4, 32, NULL);
      ballot->num_components = 4;
      nir_builder_instr_insert(&b->nb, &ballot->instr);
      val->ssa->def = &ballot->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformInverseBallot: {
      /* This one is just a BallotBitfieldExtract with subgroup invocation.
       * We could add a NIR intrinsic but it's easier to just lower it on the
       * spot.
       */
      nir_intrinsic_instr *intrin =
         nir_intrinsic_instr_create(b->nb.shader,
                                    nir_intrinsic_ballot_bitfield_extract);

      intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
      intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));

      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &intrin->instr);

      val->ssa->def = &intrin->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformBallotBitExtract:
   case SpvOpGroupNonUniformBallotBitCount:
   case SpvOpGroupNonUniformBallotFindLSB:
   case SpvOpGroupNonUniformBallotFindMSB: {
      nir_ssa_def *src0, *src1 = NULL;
      nir_intrinsic_op op;
      switch (opcode) {
      case SpvOpGroupNonUniformBallotBitExtract:
         op = nir_intrinsic_ballot_bitfield_extract;
         src0 = vtn_ssa_value(b, w[4])->def;
         src1 = vtn_ssa_value(b, w[5])->def;
         break;
      case SpvOpGroupNonUniformBallotBitCount:
         switch ((SpvGroupOperation)w[4]) {
         case SpvGroupOperationReduce:
            op = nir_intrinsic_ballot_bit_count_reduce;
            break;
         case SpvGroupOperationInclusiveScan:
            op = nir_intrinsic_ballot_bit_count_inclusive;
            break;
         case SpvGroupOperationExclusiveScan:
            op = nir_intrinsic_ballot_bit_count_exclusive;
            break;
         default:
            unreachable("Invalid group operation");
         }
         src0 = vtn_ssa_value(b, w[5])->def;
         break;
      case SpvOpGroupNonUniformBallotFindLSB:
         op = nir_intrinsic_ballot_find_lsb;
         src0 = vtn_ssa_value(b, w[4])->def;
         break;
      case SpvOpGroupNonUniformBallotFindMSB:
         op = nir_intrinsic_ballot_find_msb;
         src0 = vtn_ssa_value(b, w[4])->def;
         break;
      default:
         unreachable("Unhandled opcode");
      }

      nir_intrinsic_instr *intrin =
         nir_intrinsic_instr_create(b->nb.shader, op);

      intrin->src[0] = nir_src_for_ssa(src0);
      if (src1)
         intrin->src[1] = nir_src_for_ssa(src1);

      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &intrin->instr);

      val->ssa->def = &intrin->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformBroadcastFirst:
      vtn_build_subgroup_instr(b, nir_intrinsic_read_first_invocation,
                               val->ssa, vtn_ssa_value(b, w[4]), NULL, 0, 0);
      break;

   case SpvOpGroupNonUniformBroadcast:
      vtn_build_subgroup_instr(b, nir_intrinsic_read_invocation,
                               val->ssa, vtn_ssa_value(b, w[4]),
                               vtn_ssa_value(b, w[5])->def, 0, 0);
      break;

   case SpvOpGroupNonUniformAll:
   case SpvOpGroupNonUniformAny:
   case SpvOpGroupNonUniformAllEqual: {
      vtn_fail_if(val->type->type != glsl_bool_type(),
                  "OpGroupNonUniform(All|Any|AllEqual) must return a bool");
      nir_intrinsic_op op;
      switch (opcode) {
      case SpvOpGroupNonUniformAll:
         op = nir_intrinsic_vote_all;
         break;
      case SpvOpGroupNonUniformAny:
         op = nir_intrinsic_vote_any;
         break;
      case SpvOpGroupNonUniformAllEqual: {
         switch (glsl_get_base_type(val->type->type)) {
         case GLSL_TYPE_FLOAT:
         case GLSL_TYPE_DOUBLE:
            op = nir_intrinsic_vote_feq;
            break;
         case GLSL_TYPE_UINT:
         case GLSL_TYPE_INT:
         case GLSL_TYPE_UINT64:
         case GLSL_TYPE_INT64:
         case GLSL_TYPE_BOOL:
            op = nir_intrinsic_vote_ieq;
            break;
         default:
            unreachable("Unhandled type");
         }
         break;
      }
      default:
         unreachable("Unhandled opcode");
      }

      nir_ssa_def *src0 = vtn_ssa_value(b, w[4])->def;

      nir_intrinsic_instr *intrin =
         nir_intrinsic_instr_create(b->nb.shader, op);
      intrin->num_components = src0->num_components;
      intrin->src[0] = nir_src_for_ssa(src0);
      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &intrin->instr);

      val->ssa->def = &intrin->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformShuffle:
   case SpvOpGroupNonUniformShuffleXor:
   case SpvOpGroupNonUniformShuffleUp:
   case SpvOpGroupNonUniformShuffleDown: {
      nir_intrinsic_op op;
      switch (opcode) {
      case SpvOpGroupNonUniformShuffle:
         op = nir_intrinsic_shuffle;
         break;
      case SpvOpGroupNonUniformShuffleXor:
         op = nir_intrinsic_shuffle_xor;
         break;
      case SpvOpGroupNonUniformShuffleUp:
         op = nir_intrinsic_shuffle_up;
         break;
      case SpvOpGroupNonUniformShuffleDown:
         op = nir_intrinsic_shuffle_down;
         break;
      default:
         unreachable("Invalid opcode");
      }
      vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]),
                               vtn_ssa_value(b, w[5])->def, 0, 0);
      break;
   }

   case SpvOpGroupNonUniformQuadBroadcast:
      vtn_build_subgroup_instr(b, nir_intrinsic_quad_broadcast,
                               val->ssa, vtn_ssa_value(b, w[4]),
                               vtn_ssa_value(b, w[5])->def, 0, 0);
      break;

   case SpvOpGroupNonUniformQuadSwap: {
      unsigned direction = vtn_constant_uint(b, w[5]);
      nir_intrinsic_op op;
      switch (direction) {
      case 0:
         op = nir_intrinsic_quad_swap_horizontal;
         break;
      case 1:
         op = nir_intrinsic_quad_swap_vertical;
         break;
      case 2:
         op = nir_intrinsic_quad_swap_diagonal;
         break;
      default:
         vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap");
      }
      vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]),
                               NULL, 0, 0);
      break;
   }

   case SpvOpGroupNonUniformIAdd:
   case SpvOpGroupNonUniformFAdd:
   case SpvOpGroupNonUniformIMul:
   case SpvOpGroupNonUniformFMul:
   case SpvOpGroupNonUniformSMin:
   case SpvOpGroupNonUniformUMin:
   case SpvOpGroupNonUniformFMin:
   case SpvOpGroupNonUniformSMax:
   case SpvOpGroupNonUniformUMax:
   case SpvOpGroupNonUniformFMax:
   case SpvOpGroupNonUniformBitwiseAnd:
   case SpvOpGroupNonUniformBitwiseOr:
   case SpvOpGroupNonUniformBitwiseXor:
   case SpvOpGroupNonUniformLogicalAnd:
   case SpvOpGroupNonUniformLogicalOr:
   case SpvOpGroupNonUniformLogicalXor: {
      nir_op reduction_op;
      switch (opcode) {
      case SpvOpGroupNonUniformIAdd:
         reduction_op = nir_op_iadd;
         break;
      case SpvOpGroupNonUniformFAdd:
         reduction_op = nir_op_fadd;
         break;
      case SpvOpGroupNonUniformIMul:
         reduction_op = nir_op_imul;
         break;
      case SpvOpGroupNonUniformFMul:
         reduction_op = nir_op_fmul;
         break;
      case SpvOpGroupNonUniformSMin:
         reduction_op = nir_op_imin;
         break;
      case SpvOpGroupNonUniformUMin:
         reduction_op = nir_op_umin;
         break;
      case SpvOpGroupNonUniformFMin:
         reduction_op = nir_op_fmin;
         break;
      case SpvOpGroupNonUniformSMax:
         reduction_op = nir_op_imax;
         break;
      case SpvOpGroupNonUniformUMax:
         reduction_op = nir_op_umax;
         break;
      case SpvOpGroupNonUniformFMax:
         reduction_op = nir_op_fmax;
         break;
      case SpvOpGroupNonUniformBitwiseAnd:
      case SpvOpGroupNonUniformLogicalAnd:
         reduction_op = nir_op_iand;
         break;
      case SpvOpGroupNonUniformBitwiseOr:
      case SpvOpGroupNonUniformLogicalOr:
         reduction_op = nir_op_ior;
         break;
      case SpvOpGroupNonUniformBitwiseXor:
      case SpvOpGroupNonUniformLogicalXor:
         reduction_op = nir_op_ixor;
         break;
      default:
         unreachable("Invalid reduction operation");
      }

      nir_intrinsic_op op;
      unsigned cluster_size = 0;
      switch ((SpvGroupOperation)w[4]) {
      case SpvGroupOperationReduce:
         op = nir_intrinsic_reduce;
         break;
      case SpvGroupOperationInclusiveScan:
         op = nir_intrinsic_inclusive_scan;
         break;
      case SpvGroupOperationExclusiveScan:
         op = nir_intrinsic_exclusive_scan;
         break;
      case SpvGroupOperationClusteredReduce:
         op = nir_intrinsic_reduce;
         assert(count == 7);
         cluster_size = vtn_constant_uint(b, w[6]);
         break;
      default:
         unreachable("Invalid group operation");
      }

      vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[5]),
                               NULL, reduction_op, cluster_size);
      break;
   }

   default:
      unreachable("Invalid SPIR-V opcode");
   }
}
예제 #13
0
static nir_shader *
build_nir_itob_compute_shader(struct radv_device *dev)
{
    nir_builder b;
    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
                                           false,
                                           false,
                                           GLSL_TYPE_FLOAT);
    const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
                                       false,
                                       false,
                                       GLSL_TYPE_FLOAT);
    nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
    b.shader->info->name = ralloc_strdup(b.shader, "meta_itob_cs");
    b.shader->info->cs.local_size[0] = 16;
    b.shader->info->cs.local_size[1] = 16;
    b.shader->info->cs.local_size[2] = 1;
    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
                              sampler_type, "s_tex");
    input_img->data.descriptor_set = 0;
    input_img->data.binding = 0;

    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
                               img_type, "out_img");
    output_img->data.descriptor_set = 0;
    output_img->data.binding = 1;

    nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
    nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
    nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                            b.shader->info->cs.local_size[0],
                                            b.shader->info->cs.local_size[1],
                                            b.shader->info->cs.local_size[2], 0);

    nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);



    nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
    offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
    offset->num_components = 2;
    nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
    nir_builder_instr_insert(&b, &offset->instr);

    nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
    stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
    stride->num_components = 1;
    nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
    nir_builder_instr_insert(&b, &stride->instr);

    nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);

    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
    tex->op = nir_texop_txf;
    tex->src[0].src_type = nir_tex_src_coord;
    tex->src[0].src = nir_src_for_ssa(img_coord);
    tex->src[1].src_type = nir_tex_src_lod;
    tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
    tex->dest_type = nir_type_float;
    tex->is_array = false;
    tex->coord_components = 2;
    tex->texture = nir_deref_var_create(tex, input_img);
    tex->sampler = NULL;

    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
    nir_builder_instr_insert(&b, &tex->instr);

    nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
    nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);

    nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
    tmp = nir_iadd(&b, tmp, pos_x);

    nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);

    nir_ssa_def *outval = &tex->dest.ssa;
    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
    store->src[0] = nir_src_for_ssa(coord);
    store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
    store->src[2] = nir_src_for_ssa(outval);
    store->variables[0] = nir_deref_var_create(store, output_img);

    nir_builder_instr_insert(&b, &store->instr);
    return b.shader;
}
예제 #14
0
파일: vtn_variables.c 프로젝트: menpo/mesa
static void
_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
                      nir_deref *tail, struct vtn_ssa_value *inout)
{
   /* The deref tail may contain a deref to select a component of a vector (in
    * other words, it might not be an actual tail) so we have to save it away
    * here since we overwrite it later.
    */
   nir_deref *old_child = tail->child;

   if (glsl_type_is_vector_or_scalar(tail->type)) {
      /* Terminate the deref chain in case there is one more link to pick
       * off a component of the vector.
       */
      tail->child = NULL;

      nir_intrinsic_op op = load ? nir_intrinsic_load_var :
                                   nir_intrinsic_store_var;

      nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
      intrin->variables[0] =
         nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));
      intrin->num_components = glsl_get_vector_elements(tail->type);

      if (load) {
         nir_ssa_dest_init(&intrin->instr, &intrin->dest,
                           intrin->num_components,
                           glsl_get_bit_size(glsl_get_base_type(tail->type)),
                           NULL);
         inout->def = &intrin->dest.ssa;
      } else {
         nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
         intrin->src[0] = nir_src_for_ssa(inout->def);
      }

      nir_builder_instr_insert(&b->nb, &intrin->instr);
   } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY ||
              glsl_type_is_matrix(tail->type)) {
      unsigned elems = glsl_get_length(tail->type);
      nir_deref_array *deref_arr = nir_deref_array_create(b);
      deref_arr->deref_array_type = nir_deref_array_type_direct;
      deref_arr->deref.type = glsl_get_array_element(tail->type);
      tail->child = &deref_arr->deref;
      for (unsigned i = 0; i < elems; i++) {
         deref_arr->base_offset = i;
         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
      }
   } else {
      assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT);
      unsigned elems = glsl_get_length(tail->type);
      nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0);
      tail->child = &deref_struct->deref;
      for (unsigned i = 0; i < elems; i++) {
         deref_struct->index = i;
         deref_struct->deref.type = glsl_get_struct_field(tail->type, i);
         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
      }
   }

   tail->child = old_child;
}
예제 #15
0
/* This function recursively walks the given deref chain and replaces the
 * given copy instruction with an equivalent sequence load/store
 * operations.
 *
 * @copy_instr    The copy instruction to replace; new instructions will be
 *                inserted before this one
 *
 * @dest_head     The head of the destination variable deref chain
 *
 * @src_head      The head of the source variable deref chain
 *
 * @dest_tail     The current tail of the destination variable deref chain;
 *                this is used for recursion and external callers of this
 *                function should call it with tail == head
 *
 * @src_tail      The current tail of the source variable deref chain;
 *                this is used for recursion and external callers of this
 *                function should call it with tail == head
 *
 * @state         The current variable lowering state
 */
static void
emit_copy_load_store(nir_intrinsic_instr *copy_instr,
                     nir_deref_var *dest_head, nir_deref_var *src_head,
                     nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx)
{
    /* Find the next pair of wildcards */
    nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail);
    nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail);

    if (src_arr_parent || dest_arr_parent) {
        /* Wildcards had better come in matched pairs */
        assert(dest_arr_parent && dest_arr_parent);

        nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child);
        nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child);

        unsigned length = glsl_get_length(src_arr_parent->type);
        /* The wildcards should represent the same number of elements */
        assert(length == glsl_get_length(dest_arr_parent->type));
        assert(length > 0);

        /* Walk over all of the elements that this wildcard refers to and
         * call emit_copy_load_store on each one of them */
        src_arr->deref_array_type = nir_deref_array_type_direct;
        dest_arr->deref_array_type = nir_deref_array_type_direct;
        for (unsigned i = 0; i < length; i++) {
            src_arr->base_offset = i;
            dest_arr->base_offset = i;
            emit_copy_load_store(copy_instr, dest_head, src_head,
                                 &dest_arr->deref, &src_arr->deref, mem_ctx);
        }
        src_arr->deref_array_type = nir_deref_array_type_wildcard;
        dest_arr->deref_array_type = nir_deref_array_type_wildcard;
    } else {
        /* In this case, we have no wildcards anymore, so all we have to do
         * is just emit the load and store operations. */
        src_tail = nir_deref_tail(src_tail);
        dest_tail = nir_deref_tail(dest_tail);

        assert(src_tail->type == dest_tail->type);

        unsigned num_components = glsl_get_vector_elements(src_tail->type);

        nir_intrinsic_instr *load =
            nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
        load->num_components = num_components;
        load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
        nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);

        nir_instr_insert_before(&copy_instr->instr, &load->instr);

        nir_intrinsic_instr *store =
            nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
        store->num_components = num_components;
        store->const_index[0] = (1 << num_components) - 1;
        store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));

        store->src[0].is_ssa = true;
        store->src[0].ssa = &load->dest.ssa;

        nir_instr_insert_before(&copy_instr->instr, &store->instr);
    }
}
예제 #16
0
static bool
nir_lower_io_block(nir_block *block, void *void_state)
{
   struct lower_io_state *state = void_state;

   nir_foreach_instr_safe(block, instr) {
      if (instr->type != nir_instr_type_intrinsic)
         continue;

      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

      switch (intrin->intrinsic) {
      case nir_intrinsic_load_var: {
         nir_variable_mode mode = intrin->variables[0]->var->data.mode;
         if (mode != nir_var_shader_in && mode != nir_var_uniform)
            continue;

         bool has_indirect = deref_has_indirect(intrin->variables[0]);

         /* Figure out the opcode */
         nir_intrinsic_op load_op;
         switch (mode) {
         case nir_var_shader_in:
            load_op = has_indirect ? nir_intrinsic_load_input_indirect :
                                     nir_intrinsic_load_input;
            break;
         case nir_var_uniform:
            load_op = has_indirect ? nir_intrinsic_load_uniform_indirect :
                                     nir_intrinsic_load_uniform;
            break;
         default:
            unreachable("Unknown variable mode");
         }

         nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
                                                                load_op);
         load->num_components = intrin->num_components;

         nir_src indirect;
         unsigned offset = get_io_offset(intrin->variables[0],
                                         &intrin->instr, &indirect, state);
         offset += intrin->variables[0]->var->data.driver_location;

         load->const_index[0] = offset;

         if (has_indirect)
            load->src[0] = indirect;

         if (intrin->dest.is_ssa) {
            nir_ssa_dest_init(&load->instr, &load->dest,
                              intrin->num_components, NULL);
            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                     nir_src_for_ssa(&load->dest.ssa),
                                     state->mem_ctx);
         } else {
            nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
         }

         nir_instr_insert_before(&intrin->instr, &load->instr);
         nir_instr_remove(&intrin->instr);
         break;
      }

      case nir_intrinsic_store_var: {
         if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
            continue;

         bool has_indirect = deref_has_indirect(intrin->variables[0]);

         nir_intrinsic_op store_op;
         if (has_indirect) {
            store_op = nir_intrinsic_store_output_indirect;
         } else {
            store_op = nir_intrinsic_store_output;
         }

         nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
                                                                 store_op);
         store->num_components = intrin->num_components;

         nir_src indirect;
         unsigned offset = get_io_offset(intrin->variables[0],
                                         &intrin->instr, &indirect, state);
         offset += intrin->variables[0]->var->data.driver_location;

         store->const_index[0] = offset;

         nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);

         if (has_indirect)
            store->src[1] = indirect;

         nir_instr_insert_before(&intrin->instr, &store->instr);
         nir_instr_remove(&intrin->instr);
         break;
      }

      default:
         break;
      }
   }

   return true;
}
예제 #17
0
	b.shader->info->cs.local_size[1] = 1;
	b.shader->info->cs.local_size[2] = 1;

	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
	nir_ssa_def *block_size = nir_imm_ivec4(&b,
						b.shader->info->cs.local_size[0],
						b.shader->info->cs.local_size[1],
						b.shader->info->cs.local_size[2], 0);

	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
	offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);

	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
	                                                          nir_intrinsic_vulkan_resource_index);
	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
	nir_intrinsic_set_desc_set(dst_buf, 0);
	nir_intrinsic_set_binding(dst_buf, 0);
	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
	nir_builder_instr_insert(&b, &dst_buf->instr);

	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
	load->num_components = 1;
	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
	nir_builder_instr_insert(&b, &load->instr);

	nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);

	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
예제 #18
0
/* Recursively constructs deref chains to split a copy instruction into
 * multiple (if needed) copy instructions with full-length deref chains.
 * External callers of this function should pass the tail and head of the
 * deref chains found as the source and destination of the copy instruction
 * into this function.
 *
 * \param  old_copy  The copy instruction we are splitting
 * \param  dest_head The head of the destination deref chain we are building
 * \param  src_head  The head of the source deref chain we are building
 * \param  dest_tail The tail of the destination deref chain we are building
 * \param  src_tail  The tail of the source deref chain we are building
 * \param  state     The current split_var_copies_state object
 */
static void
split_var_copy_instr(nir_intrinsic_instr *old_copy,
                     nir_deref *dest_head, nir_deref *src_head,
                     nir_deref *dest_tail, nir_deref *src_tail,
                     struct split_var_copies_state *state)
{
   assert(src_tail->type == dest_tail->type);

   /* Make sure these really are the tails of the deref chains */
   assert(dest_tail->child == NULL);
   assert(src_tail->child == NULL);

   switch (glsl_get_base_type(src_tail->type)) {
   case GLSL_TYPE_ARRAY: {
      /* Make a wildcard dereference */
      nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
      deref->deref.type = glsl_get_array_element(src_tail->type);
      deref->deref_array_type = nir_deref_array_type_wildcard;

      /* Set the tail of both as the newly created wildcard deref.  It is
       * safe to use the same wildcard in both places because a) we will be
       * copying it before we put it in an actual instruction and b)
       * everything that will potentially add another link in the deref
       * chain will also add the same thing to both chains.
       */
      src_tail->child = &deref->deref;
      dest_tail->child = &deref->deref;

      split_var_copy_instr(old_copy, dest_head, src_head,
                           dest_tail->child, src_tail->child, state);

      /* Set it back to the way we found it */
      src_tail->child = NULL;
      dest_tail->child = NULL;
      break;
   }

   case GLSL_TYPE_STRUCT:
      /* This is the only part that actually does any interesting
       * splitting.  For array types, we just use wildcards and resolve
       * them later.  For structure types, we need to emit one copy
       * instruction for every structure element.  Because we may have
       * structs inside structs, we just recurse and let the next level
       * take care of any additional structures.
       */
      for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) {
         nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i);
         deref->deref.type = glsl_get_struct_field(src_tail->type, i);

         /* Set the tail of both as the newly created structure deref.  It
          * is safe to use the same wildcard in both places because a) we
          * will be copying it before we put it in an actual instruction
          * and b) everything that will potentially add another link in the
          * deref chain will also add the same thing to both chains.
          */
         src_tail->child = &deref->deref;
         dest_tail->child = &deref->deref;

         split_var_copy_instr(old_copy, dest_head, src_head,
                              dest_tail->child, src_tail->child, state);
      }
      /* Set it back to the way we found it */
      src_tail->child = NULL;
      dest_tail->child = NULL;
      break;

   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
      if (glsl_type_is_matrix(src_tail->type)) {
         nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
         deref->deref.type = glsl_get_column_type(src_tail->type);
         deref->deref_array_type = nir_deref_array_type_wildcard;

         /* Set the tail of both as the newly created wildcard deref.  It
          * is safe to use the same wildcard in both places because a) we
          * will be copying it before we put it in an actual instruction
          * and b) everything that will potentially add another link in the
          * deref chain will also add the same thing to both chains.
          */
         src_tail->child = &deref->deref;
         dest_tail->child = &deref->deref;

         split_var_copy_instr(old_copy, dest_head, src_head,
                              dest_tail->child, src_tail->child, state);

         /* Set it back to the way we found it */
         src_tail->child = NULL;
         dest_tail->child = NULL;
      } else {
         /* At this point, we have fully built our deref chains and can
          * actually add the new copy instruction.
          */
         nir_intrinsic_instr *new_copy =
            nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var);

         /* We need to make copies because a) this deref chain actually
          * belongs to the copy instruction and b) the deref chains may
          * have some of the same links due to the way we constructed them
          */
         nir_deref *src = nir_copy_deref(new_copy, src_head);
         nir_deref *dest = nir_copy_deref(new_copy, dest_head);

         new_copy->variables[0] = nir_deref_as_var(dest);
         new_copy->variables[1] = nir_deref_as_var(src);

         /* Emit the copy instruction after the old instruction.  We'll
          * remove the old one later.
          */
         nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
         state->progress = true;
      }
      break;

   case GLSL_TYPE_SAMPLER:
   case GLSL_TYPE_IMAGE:
   case GLSL_TYPE_ATOMIC_UINT:
   case GLSL_TYPE_INTERFACE:
   default:
      unreachable("Cannot copy these types");
   }
}
예제 #19
0
static void
lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
{
   nir_intrinsic_op op;
   switch (instr->intrinsic) {
   case nir_intrinsic_atomic_counter_read_var:
      op = nir_intrinsic_atomic_counter_read;
      break;

   case nir_intrinsic_atomic_counter_inc_var:
      op = nir_intrinsic_atomic_counter_inc;
      break;

   case nir_intrinsic_atomic_counter_dec_var:
      op = nir_intrinsic_atomic_counter_dec;
      break;

   default:
      return;
   }

   if (instr->variables[0]->var->data.mode != nir_var_uniform)
      return; /* atomics passed as function arguments can't be lowered */

   void *mem_ctx = ralloc_parent(instr);

   nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
   new_instr->const_index[0] =
      (int) instr->variables[0]->var->data.atomic.buffer_index;

   nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
   offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;

   nir_instr_insert_before(&instr->instr, &offset_const->instr);

   nir_ssa_def *offset_def = &offset_const->def;

   if (instr->variables[0]->deref.child != NULL) {
      assert(instr->variables[0]->deref.child->deref_type ==
             nir_deref_type_array);
      nir_deref_array *deref_array =
         nir_deref_as_array(instr->variables[0]->deref.child);
      assert(deref_array->deref.child == NULL);

      offset_const->value.u[0] +=
         deref_array->base_offset * ATOMIC_COUNTER_SIZE;

      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
         nir_load_const_instr *atomic_counter_size =
               nir_load_const_instr_create(mem_ctx, 1);
         atomic_counter_size->value.u[0] = ATOMIC_COUNTER_SIZE;
         nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);

         nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
         mul->dest.write_mask = 0x1;
         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
         mul->src[1].src.is_ssa = true;
         mul->src[1].src.ssa = &atomic_counter_size->def;
         nir_instr_insert_before(&instr->instr, &mul->instr);

         nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
         nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
         add->dest.write_mask = 0x1;
         add->src[0].src.is_ssa = true;
         add->src[0].src.ssa = &mul->dest.dest.ssa;
         add->src[1].src.is_ssa = true;
         add->src[1].src.ssa = &offset_const->def;
         nir_instr_insert_before(&instr->instr, &add->instr);

         offset_def = &add->dest.dest.ssa;
      }
   }

   new_instr->src[0].is_ssa = true;
   new_instr->src[0].ssa = offset_def;;

   if (instr->dest.is_ssa) {
      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
                        instr->dest.ssa.num_components, NULL);
      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
                               nir_src_for_ssa(&new_instr->dest.ssa),
                               mem_ctx);
   } else {
      nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
   }

   nir_instr_insert_before(&instr->instr, &new_instr->instr);
   nir_instr_remove(&instr->instr);
}
예제 #20
0
static nir_shader *
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int samples)
{
	nir_builder b;
	char name[64];
	nir_if *outer_if = NULL;
	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
								 false,
								 false,
								 GLSL_TYPE_FLOAT);
	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
							     false,
							     false,
							     GLSL_TYPE_FLOAT);
	snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float");
	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
	b.shader->info->name = ralloc_strdup(b.shader, name);
	b.shader->info->cs.local_size[0] = 16;
	b.shader->info->cs.local_size[1] = 16;
	b.shader->info->cs.local_size[2] = 1;

	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
						      sampler_type, "s_tex");
	input_img->data.descriptor_set = 0;
	input_img->data.binding = 0;

	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
						       img_type, "out_img");
	output_img->data.descriptor_set = 0;
	output_img->data.binding = 1;
	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
	nir_ssa_def *block_size = nir_imm_ivec4(&b,
						b.shader->info->cs.local_size[0],
						b.shader->info->cs.local_size[1],
						b.shader->info->cs.local_size[2], 0);

	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
	src_offset->num_components = 2;
	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
	nir_builder_instr_insert(&b, &src_offset->instr);

	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
	dst_offset->num_components = 2;
	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
	nir_builder_instr_insert(&b, &dst_offset->instr);

	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
	/* do a txf_ms on each sample */
	nir_ssa_def *tmp;

	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
	tex->op = nir_texop_txf_ms;
	tex->src[0].src_type = nir_tex_src_coord;
	tex->src[0].src = nir_src_for_ssa(img_coord);
	tex->src[1].src_type = nir_tex_src_ms_index;
	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
	tex->dest_type = nir_type_float;
	tex->is_array = false;
	tex->coord_components = 2;
	tex->texture = nir_deref_var_create(tex, input_img);
	tex->sampler = NULL;

	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
	nir_builder_instr_insert(&b, &tex->instr);

	tmp = &tex->dest.ssa;
	nir_variable *color =
		nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

	if (!is_integer && samples > 1) {
		nir_tex_instr *tex_all_same = nir_tex_instr_create(b.shader, 1);
		tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
		tex_all_same->op = nir_texop_samples_identical;
		tex_all_same->src[0].src_type = nir_tex_src_coord;
		tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
		tex_all_same->dest_type = nir_type_float;
		tex_all_same->is_array = false;
		tex_all_same->coord_components = 2;
		tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img);
		tex_all_same->sampler = NULL;

		nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
		nir_builder_instr_insert(&b, &tex_all_same->instr);

		nir_ssa_def *all_same = nir_ine(&b, &tex_all_same->dest.ssa, nir_imm_int(&b, 0));
		nir_if *if_stmt = nir_if_create(b.shader);
		if_stmt->condition = nir_src_for_ssa(all_same);
		nir_cf_node_insert(b.cursor, &if_stmt->cf_node);

		b.cursor = nir_after_cf_list(&if_stmt->then_list);
		for (int i = 1; i < samples; i++) {
			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 2);
			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
			tex_add->op = nir_texop_txf_ms;
			tex_add->src[0].src_type = nir_tex_src_coord;
			tex_add->src[0].src = nir_src_for_ssa(img_coord);
			tex_add->src[1].src_type = nir_tex_src_ms_index;
			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
			tex_add->dest_type = nir_type_float;
			tex_add->is_array = false;
			tex_add->coord_components = 2;
			tex_add->texture = nir_deref_var_create(tex_add, input_img);
			tex_add->sampler = NULL;

			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
			nir_builder_instr_insert(&b, &tex_add->instr);

			tmp = nir_fadd(&b, tmp, &tex_add->dest.ssa);
		}

		tmp = nir_fdiv(&b, tmp, nir_imm_float(&b, samples));
		nir_store_var(&b, color, tmp, 0xf);
		b.cursor = nir_after_cf_list(&if_stmt->else_list);
		outer_if = if_stmt;
	}
	nir_store_var(&b, color, &tex->dest.ssa, 0xf);

	if (outer_if)
		b.cursor = nir_after_cf_node(&outer_if->cf_node);

	nir_ssa_def *newv = nir_load_var(&b, color);
	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
	store->src[0] = nir_src_for_ssa(coord);
	store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
	store->src[2] = nir_src_for_ssa(newv);
	store->variables[0] = nir_deref_var_create(store, output_img);
	nir_builder_instr_insert(&b, &store->instr);
	return b.shader;
}
예제 #21
0
static void *vc4_get_yuv_vs(struct pipe_context *pctx)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;

   if (vc4->yuv_linear_blit_vs)
           return vc4->yuv_linear_blit_vs;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_VERTEX);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
   b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs");

   const struct glsl_type *vec4 = glsl_vec4_type();
   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");

   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                               vec4, "gl_Position");
   pos_out->data.location = VARYING_SLOT_POS;

   nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl);

   return vc4->yuv_linear_blit_vs;
}

static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;
   struct pipe_shader_state **cached_shader;
   const char *name;

   if (cpp == 1) {
           cached_shader = &vc4->yuv_linear_blit_fs_8bit;
           name = "linear_blit_8bit_fs";
   } else {
           cached_shader = &vc4->yuv_linear_blit_fs_16bit;
           name = "linear_blit_16bit_fs";
   }

   if (*cached_shader)
           return *cached_shader;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_FRAGMENT);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
   b.shader->info.name = ralloc_strdup(b.shader, name);

   const struct glsl_type *vec4 = glsl_vec4_type();
   const struct glsl_type *glsl_int = glsl_int_type();

   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                 vec4, "f_color");
   color_out->data.location = FRAG_RESULT_COLOR;

   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");
   pos_in->data.location = VARYING_SLOT_POS;
   nir_ssa_def *pos = nir_load_var(&b, pos_in);

   nir_ssa_def *one = nir_imm_int(&b, 1);
   nir_ssa_def *two = nir_imm_int(&b, 2);

   nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
   nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));

   nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform,
                                                 glsl_int, "stride");
   nir_ssa_def *stride = nir_load_var(&b, stride_in);

   nir_ssa_def *x_offset;
   nir_ssa_def *y_offset;
   if (cpp == 1) {
           nir_ssa_def *intra_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, one), two);
           nir_ssa_def *inter_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one);

           x_offset = nir_iadd(&b,
                               intra_utile_x_offset,
                               inter_utile_x_offset);
           y_offset = nir_imul(&b,
                               nir_iadd(&b,
                                        nir_ishl(&b, y, one),
                                        nir_ushr(&b, nir_iand(&b, x, two), one)),
                               stride);
   } else {
           x_offset = nir_ishl(&b, x, two);
           y_offset = nir_imul(&b, y, stride);
   }

   nir_intrinsic_instr *load =
           nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
   load->num_components = 1;
   nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
   load->src[0] = nir_src_for_ssa(one);
   load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset));
   nir_builder_instr_insert(&b, &load->instr);

   nir_store_var(&b, color_out,
                 nir_unpack_unorm_4x8(&b, &load->dest.ssa),
                 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl);

   return *cached_shader;
}

static bool
vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_resource *src = vc4_resource(info->src.resource);
        struct vc4_resource *dst = vc4_resource(info->dst.resource);
        bool ok;

        if (src->tiled)
                return false;
        if (src->base.format != PIPE_FORMAT_R8_UNORM &&
            src->base.format != PIPE_FORMAT_R8G8_UNORM)
                return false;

        /* YUV blits always turn raster-order to tiled */
        assert(dst->base.format == src->base.format);
        assert(dst->tiled);

        /* Always 1:1 and at the origin */
        assert(info->src.box.x == 0 && info->dst.box.x == 0);
        assert(info->src.box.y == 0 && info->dst.box.y == 0);
        assert(info->src.box.width == info->dst.box.width);
        assert(info->src.box.height == info->dst.box.height);

        if ((src->slices[info->src.level].offset & 3) ||
            (src->slices[info->src.level].stride & 3)) {
                perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n",
                           src->slices[info->src.level].offset,
                           src->slices[info->src.level].stride);
                goto fallback;
        }

        vc4_blitter_save(vc4);

        /* Create a renderable surface mapping the T-tiled shadow buffer.
         */
        struct pipe_surface dst_tmpl;
        util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
                                         info->dst.level, info->dst.box.z);
        dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM;
        struct pipe_surface *dst_surf =
                pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
        if (!dst_surf) {
                fprintf(stderr, "Failed to create YUV dst surface\n");
                util_blitter_unset_running_flag(vc4->blitter);
                return false;
        }
        dst_surf->width /= 2;
        if (dst->cpp == 1)
                dst_surf->height /= 2;

        /* Set the constant buffer. */
        uint32_t stride = src->slices[info->src.level].stride;
        struct pipe_constant_buffer cb_uniforms = {
                .user_buffer = &stride,
                .buffer_size = sizeof(stride),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms);
        struct pipe_constant_buffer cb_src = {
                .buffer = info->src.resource,
                .buffer_offset = src->slices[info->src.level].offset,
                .buffer_size = (src->bo->size -
                                src->slices[info->src.level].offset),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src);

        /* Unbind the textures, to make sure we don't try to recurse into the
         * shadow blit.
         */
        pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
        pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);

        util_blitter_custom_shader(vc4->blitter, dst_surf,
                                   vc4_get_yuv_vs(pctx),
                                   vc4_get_yuv_fs(pctx, src->cpp));

        util_blitter_restore_textures(vc4->blitter);
        util_blitter_restore_constant_buffer_state(vc4->blitter);
        /* Restore cb1 (util_blitter doesn't handle this one). */
        struct pipe_constant_buffer cb_disabled = { 0 };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled);

        pipe_surface_reference(&dst_surf, NULL);

        return true;

fallback:
        /* Do an immediate SW fallback, since the render blit path
         * would just recurse.
         */
        ok = util_try_blit_via_copy_region(pctx, info);
        assert(ok); (void)ok;

        return true;
}

static bool
vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(ctx);

        if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
                fprintf(stderr, "blit unsupported %s -> %s\n",
                    util_format_short_name(info->src.resource->format),
                    util_format_short_name(info->dst.resource->format));
                return false;
        }

        /* Enable the scissor, so we get a minimal set of tiles rendered. */
        if (!info->scissor_enable) {
                info->scissor_enable = true;
                info->scissor.minx = info->dst.box.x;
                info->scissor.miny = info->dst.box.y;
                info->scissor.maxx = info->dst.box.x + info->dst.box.width;
                info->scissor.maxy = info->dst.box.y + info->dst.box.height;
        }

        vc4_blitter_save(vc4);
        util_blitter_blit(vc4->blitter, info);

        return true;
}

/* Optimal hardware path for blitting pixels.
 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
 */
void
vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
{
        struct pipe_blit_info info = *blit_info;

        if (vc4_yuv_blit(pctx, blit_info))
                return;

        if (vc4_tile_blit(pctx, blit_info))
                return;

        if (info.mask & PIPE_MASK_S) {
                if (util_try_blit_via_copy_region(pctx, &info))
                        return;

                info.mask &= ~PIPE_MASK_S;
                fprintf(stderr, "cannot blit stencil, skipping\n");
        }

        if (vc4_render_blit(pctx, &info))
                return;

        fprintf(stderr, "Unsupported blit\n");
}