static void lower_load_store(nir_builder *b, nir_intrinsic_instr *intrin, glsl_type_size_align_func size_align) { b->cursor = nir_before_instr(&intrin->instr); nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); nir_variable *var = nir_deref_instr_get_variable(deref); nir_ssa_def *offset = nir_iadd_imm(b, nir_build_deref_offset(b, deref, size_align), var->data.location); unsigned align, UNUSED size; size_align(deref->type, &size, &align); if (intrin->intrinsic == nir_intrinsic_load_deref) { nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_scratch); load->num_components = intrin->num_components; load->src[0] = nir_src_for_ssa(offset); nir_intrinsic_set_align(load, align, 0); nir_ssa_dest_init(&load->instr, &load->dest, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, NULL); nir_builder_instr_insert(b, &load->instr); nir_ssa_def *value = &load->dest.ssa; if (glsl_type_is_boolean(deref->type)) value = nir_b2i32(b, value); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&load->dest.ssa)); } else { assert(intrin->intrinsic == nir_intrinsic_store_deref); assert(intrin->src[1].is_ssa); nir_ssa_def *value = intrin->src[1].ssa; if (glsl_type_is_boolean(deref->type)) value = nir_i2b(b, value); nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_scratch); store->num_components = intrin->num_components; store->src[0] = nir_src_for_ssa(value); store->src[1] = nir_src_for_ssa(offset); nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); nir_intrinsic_set_align(store, align, 0); nir_builder_instr_insert(b, &store->instr); } nir_instr_remove(&intrin->instr); nir_deref_instr_remove_if_unused(deref); }
static void emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, nir_deref_var *deref, nir_deref *tail, nir_ssa_def **dest, nir_ssa_def *src) { for (; tail->child; tail = tail->child) { if (tail->child->deref_type != nir_deref_type_array) continue; nir_deref_array *arr = nir_deref_as_array(tail->child); if (arr->deref_array_type != nir_deref_array_type_indirect) continue; int length = glsl_get_length(tail->type); emit_indirect_load_store(b, orig_instr, deref, tail, -arr->base_offset, length - arr->base_offset, dest, src); return; } assert(tail && tail->child == NULL); /* We reached the end of the deref chain. Emit the instruction */ if (src == NULL) { /* This is a load instruction */ nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); load->num_components = orig_instr->num_components; load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &deref->deref)); unsigned bit_size = orig_instr->dest.ssa.bit_size; nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, bit_size, NULL); nir_builder_instr_insert(b, &load->instr); *dest = &load->dest.ssa; } else { /* This is a store instruction */ nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); store->num_components = orig_instr->num_components; nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(orig_instr)); store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &deref->deref)); store->src[0] = nir_src_for_ssa(src); nir_builder_instr_insert(b, &store->instr); } }
static bool constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) { bool progress = false; if (instr->intrinsic == nir_intrinsic_discard_if && nir_src_is_const(instr->src[0])) { if (nir_src_as_bool(instr->src[0])) { /* This method of getting a nir_shader * from a nir_instr is * admittedly gross, but given the rarity of hitting this case I think * it's preferable to plumbing an otherwise unused nir_shader * * parameter through four functions to get here. */ nir_cf_node *cf_node = &instr->instr.block->cf_node; nir_function_impl *impl = nir_cf_node_get_function(cf_node); nir_shader *shader = impl->function->shader; nir_intrinsic_instr *discard = nir_intrinsic_instr_create(shader, nir_intrinsic_discard); nir_instr_insert_before(&instr->instr, &discard->instr); nir_instr_remove(&instr->instr); progress = true; } else { /* We're not discarding, just delete the instruction */ nir_instr_remove(&instr->instr); progress = true; } } return progress; }
static nir_ssa_def * get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, struct vtn_type **type, unsigned *chain_idx) { /* Push constants have no explicit binding */ if (chain->var->mode == vtn_variable_mode_push_constant) { *chain_idx = 0; *type = chain->var->type; return NULL; } nir_ssa_def *array_index; if (glsl_type_is_array(chain->var->type->type)) { assert(chain->length > 0); array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); *chain_idx = 1; *type = chain->var->type->array_element; } else { array_index = nir_imm_int(&b->nb, 0); *chain_idx = 0; *type = chain->var->type; } nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_vulkan_resource_index); instr->src[0] = nir_src_for_ssa(array_index); nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set); nir_intrinsic_set_binding(instr, chain->var->binding); nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); nir_builder_instr_insert(&b->nb, &instr->instr); return &instr->dest.ssa; }
static void build_color_shaders(struct nir_shader **out_vs, struct nir_shader **out_fs, uint32_t frag_output) { nir_builder vs_b; nir_builder fs_b; nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); const struct glsl_type *position_type = glsl_vec4_type(); const struct glsl_type *color_type = glsl_vec4_type(); nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position"); vs_out_pos->data.location = VARYING_SLOT_POS; nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant); nir_intrinsic_set_base(in_color_load, 0); nir_intrinsic_set_range(in_color_load, 16); in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0)); in_color_load->num_components = 4; nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 4, 32, "clear color"); nir_builder_instr_insert(&fs_b, &in_color_load->instr); nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color"); fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; nir_store_var(&fs_b, fs_out_color, &in_color_load->dest.ssa, 0xf); nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b); nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); const struct glsl_type *layer_type = glsl_int_type(); nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); vs_out_layer->data.location = VARYING_SLOT_LAYER; vs_out_layer->data.interpolation = INTERP_MODE_FLAT; nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0); nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0); nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); *out_vs = vs_b.shader; *out_fs = fs_b.shader; }
static void handle_glsl450_interpolation(struct vtn_builder *b, enum GLSLstd450 opcode, const uint32_t *w, unsigned count) { const struct glsl_type *dest_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->ssa = vtn_create_ssa_value(b, dest_type); nir_intrinsic_op op; switch (opcode) { case GLSLstd450InterpolateAtCentroid: op = nir_intrinsic_interp_var_at_centroid; break; case GLSLstd450InterpolateAtSample: op = nir_intrinsic_interp_var_at_sample; break; case GLSLstd450InterpolateAtOffset: op = nir_intrinsic_interp_var_at_offset; break; default: unreachable("Invalid opcode"); } nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); nir_deref_var *deref = vtn_nir_deref(b, w[5]); intrin->variables[0] = nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); switch (opcode) { case GLSLstd450InterpolateAtCentroid: break; case GLSLstd450InterpolateAtSample: case GLSLstd450InterpolateAtOffset: intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); break; default: unreachable("Invalid opcode"); } intrin->num_components = glsl_get_vector_elements(dest_type); nir_ssa_dest_init(&intrin->instr, &intrin->dest, glsl_get_vector_elements(dest_type), glsl_get_bit_size(dest_type), NULL); val->ssa->def = &intrin->dest.ssa; nir_builder_instr_insert(&b->nb, &intrin->instr); }
static nir_shader * build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples) { nir_builder b; char name[64]; const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT); snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float"); nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, name); nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; input_img->data.binding = 0; nir_variable *fs_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "fs_pos_in"); fs_pos_in->data.location = VARYING_SLOT_POS; nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; nir_ssa_def *pos_in = nir_load_var(&b, fs_pos_in); nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); nir_intrinsic_set_base(src_offset, 0); nir_intrinsic_set_range(src_offset, 8); src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); src_offset->num_components = 2; nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset"); nir_builder_instr_insert(&b, &src_offset->instr); nir_ssa_def *pos_int = nir_f2i32(&b, pos_in); nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3); nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color"); radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, img_coord); nir_ssa_def *outval = nir_load_var(&b, color); nir_store_var(&b, color_out, outval, 0xf); return b.shader; }
static void vtn_build_subgroup_instr(struct vtn_builder *b, nir_intrinsic_op nir_op, struct vtn_ssa_value *dst, struct vtn_ssa_value *src0, nir_ssa_def *index, unsigned const_idx0, unsigned const_idx1) { /* Some of the subgroup operations take an index. SPIR-V allows this to be * any integer type. To make things simpler for drivers, we only support * 32-bit indices. */ if (index && index->bit_size != 32) index = nir_u2u32(&b->nb, index); vtn_assert(dst->type == src0->type); if (!glsl_type_is_vector_or_scalar(dst->type)) { for (unsigned i = 0; i < glsl_get_length(dst->type); i++) { vtn_build_subgroup_instr(b, nir_op, dst->elems[i], src0->elems[i], index, const_idx0, const_idx1); } return; } nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, nir_op); nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, dst->type, NULL); intrin->num_components = intrin->dest.ssa.num_components; intrin->src[0] = nir_src_for_ssa(src0->def); if (index) intrin->src[1] = nir_src_for_ssa(index); intrin->const_index[0] = const_idx0; intrin->const_index[1] = const_idx1; nir_builder_instr_insert(&b->nb, &intrin->instr); dst->def = &intrin->dest.ssa; }
static void _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_ssa_def *index, nir_ssa_def *offset, struct vtn_ssa_value **inout, const struct glsl_type *type) { nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); instr->num_components = glsl_get_vector_elements(type); int src = 0; if (!load) { nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1); instr->src[src++] = nir_src_for_ssa((*inout)->def); } /* We set the base and size for push constant load to the entire push * constant block for now. */ if (op == nir_intrinsic_load_push_constant) { nir_intrinsic_set_base(instr, 0); nir_intrinsic_set_range(instr, 128); } if (index) instr->src[src++] = nir_src_for_ssa(index); instr->src[src++] = nir_src_for_ssa(offset); if (load) { nir_ssa_dest_init(&instr->instr, &instr->dest, instr->num_components, glsl_get_bit_size(glsl_get_base_type(type)), NULL); (*inout)->def = &instr->dest.ssa; } nir_builder_instr_insert(&b->nb, &instr->instr); if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); }
static void lower_instr(nir_intrinsic_instr *instr, lower_atomic_state *state) { nir_intrinsic_op op; switch (instr->intrinsic) { case nir_intrinsic_atomic_counter_read_var: op = nir_intrinsic_atomic_counter_read; break; case nir_intrinsic_atomic_counter_inc_var: op = nir_intrinsic_atomic_counter_inc; break; case nir_intrinsic_atomic_counter_dec_var: op = nir_intrinsic_atomic_counter_dec; break; default: return; } if (instr->variables[0]->var->data.mode != nir_var_uniform && instr->variables[0]->var->data.mode != nir_var_shader_storage) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); unsigned uniform_loc = instr->variables[0]->var->data.location; nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); new_instr->const_index[0] = state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; nir_instr_insert_before(&instr->instr, &offset_const->instr); nir_ssa_def *offset_def = &offset_const->def; nir_deref *tail = &instr->variables[0]->deref; while (tail->child != NULL) { assert(tail->child->deref_type == nir_deref_type_array); nir_deref_array *deref_array = nir_deref_as_array(tail->child); tail = tail->child; unsigned child_array_elements = tail->child != NULL ? glsl_get_aoa_size(tail->type) : 1; offset_const->value.u[0] += deref_array->base_offset * child_array_elements * ATOMIC_COUNTER_SIZE; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_load_const_instr *atomic_counter_size = nir_load_const_instr_create(mem_ctx, 1); atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE; nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr); nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul); nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); mul->dest.write_mask = 0x1; nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul); mul->src[1].src.is_ssa = true; mul->src[1].src.ssa = &atomic_counter_size->def; nir_instr_insert_before(&instr->instr, &mul->instr); nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd); nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); add->dest.write_mask = 0x1; add->src[0].src.is_ssa = true; add->src[0].src.ssa = &mul->dest.dest.ssa; add->src[1].src.is_ssa = true; add->src[1].src.ssa = offset_def; nir_instr_insert_before(&instr->instr, &add->instr); offset_def = &add->dest.dest.ssa; } } new_instr->src[0].is_ssa = true; new_instr->src[0].ssa = offset_def; if (instr->dest.is_ssa) { nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, instr->dest.ssa.num_components, NULL); nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&new_instr->dest.ssa)); } else { nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); } nir_instr_insert_before(&instr->instr, &new_instr->instr); nir_instr_remove(&instr->instr); }
static nir_shader * create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler, const nir_shader_compiler_options *options, const struct brw_tcs_prog_key *key) { nir_builder b; nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_TESS_CTRL, options); nir_shader *nir = b.shader; nir_variable *var; nir_intrinsic_instr *load; nir_intrinsic_instr *store; nir_ssa_def *zero = nir_imm_int(&b, 0); nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0); nir->info->inputs_read = key->outputs_written; nir->info->outputs_written = key->outputs_written; nir->info->tcs.vertices_out = key->input_vertices; nir->info->name = ralloc_strdup(nir, "passthrough"); nir->num_uniforms = 8 * sizeof(uint32_t); var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0"); var->data.location = 0; var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1"); var->data.location = 1; /* Write the patch URB header. */ for (int i = 0; i <= 1; i++) { load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); load->num_components = 4; load->src[0] = nir_src_for_ssa(zero); nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t)); nir_builder_instr_insert(&b, &load->instr); store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output); store->num_components = 4; store->src[0] = nir_src_for_ssa(&load->dest.ssa); store->src[1] = nir_src_for_ssa(zero); nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i); nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); nir_builder_instr_insert(&b, &store->instr); } /* Copy inputs to outputs. */ uint64_t varyings = key->outputs_written; while (varyings != 0) { const int varying = ffsll(varyings) - 1; load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_per_vertex_input); load->num_components = 4; load->src[0] = nir_src_for_ssa(invoc_id); load->src[1] = nir_src_for_ssa(zero); nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); nir_intrinsic_set_base(load, varying); nir_builder_instr_insert(&b, &load->instr); store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_per_vertex_output); store->num_components = 4; store->src[0] = nir_src_for_ssa(&load->dest.ssa); store->src[1] = nir_src_for_ssa(invoc_id); store->src[2] = nir_src_for_ssa(zero); nir_intrinsic_set_base(store, varying); nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); nir_builder_instr_insert(&b, &store->instr); varyings &= ~BITFIELD64_BIT(varying); } nir_validate_shader(nir); nir = brw_preprocess_nir(compiler, nir); return nir; }
void vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->ssa = vtn_create_ssa_value(b, val->type->type); switch (opcode) { case SpvOpGroupNonUniformElect: { vtn_fail_if(val->type->type != glsl_bool_type(), "OpGroupNonUniformElect must return a Bool"); nir_intrinsic_instr *elect = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_elect); nir_ssa_dest_init_for_type(&elect->instr, &elect->dest, val->type->type, NULL); nir_builder_instr_insert(&b->nb, &elect->instr); val->ssa->def = &elect->dest.ssa; break; } case SpvOpGroupNonUniformBallot: { vtn_fail_if(val->type->type != glsl_vector_type(GLSL_TYPE_UINT, 4), "OpGroupNonUniformBallot must return a uvec4"); nir_intrinsic_instr *ballot = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot); ballot->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); nir_ssa_dest_init(&ballot->instr, &ballot->dest, 4, 32, NULL); ballot->num_components = 4; nir_builder_instr_insert(&b->nb, &ballot->instr); val->ssa->def = &ballot->dest.ssa; break; } case SpvOpGroupNonUniformInverseBallot: { /* This one is just a BallotBitfieldExtract with subgroup invocation. * We could add a NIR intrinsic but it's easier to just lower it on the * spot. */ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot_bitfield_extract); intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb)); nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, val->type->type, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); val->ssa->def = &intrin->dest.ssa; break; } case SpvOpGroupNonUniformBallotBitExtract: case SpvOpGroupNonUniformBallotBitCount: case SpvOpGroupNonUniformBallotFindLSB: case SpvOpGroupNonUniformBallotFindMSB: { nir_ssa_def *src0, *src1 = NULL; nir_intrinsic_op op; switch (opcode) { case SpvOpGroupNonUniformBallotBitExtract: op = nir_intrinsic_ballot_bitfield_extract; src0 = vtn_ssa_value(b, w[4])->def; src1 = vtn_ssa_value(b, w[5])->def; break; case SpvOpGroupNonUniformBallotBitCount: switch ((SpvGroupOperation)w[4]) { case SpvGroupOperationReduce: op = nir_intrinsic_ballot_bit_count_reduce; break; case SpvGroupOperationInclusiveScan: op = nir_intrinsic_ballot_bit_count_inclusive; break; case SpvGroupOperationExclusiveScan: op = nir_intrinsic_ballot_bit_count_exclusive; break; default: unreachable("Invalid group operation"); } src0 = vtn_ssa_value(b, w[5])->def; break; case SpvOpGroupNonUniformBallotFindLSB: op = nir_intrinsic_ballot_find_lsb; src0 = vtn_ssa_value(b, w[4])->def; break; case SpvOpGroupNonUniformBallotFindMSB: op = nir_intrinsic_ballot_find_msb; src0 = vtn_ssa_value(b, w[4])->def; break; default: unreachable("Unhandled opcode"); } nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); intrin->src[0] = nir_src_for_ssa(src0); if (src1) intrin->src[1] = nir_src_for_ssa(src1); nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, val->type->type, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); val->ssa->def = &intrin->dest.ssa; break; } case SpvOpGroupNonUniformBroadcastFirst: vtn_build_subgroup_instr(b, nir_intrinsic_read_first_invocation, val->ssa, vtn_ssa_value(b, w[4]), NULL, 0, 0); break; case SpvOpGroupNonUniformBroadcast: vtn_build_subgroup_instr(b, nir_intrinsic_read_invocation, val->ssa, vtn_ssa_value(b, w[4]), vtn_ssa_value(b, w[5])->def, 0, 0); break; case SpvOpGroupNonUniformAll: case SpvOpGroupNonUniformAny: case SpvOpGroupNonUniformAllEqual: { vtn_fail_if(val->type->type != glsl_bool_type(), "OpGroupNonUniform(All|Any|AllEqual) must return a bool"); nir_intrinsic_op op; switch (opcode) { case SpvOpGroupNonUniformAll: op = nir_intrinsic_vote_all; break; case SpvOpGroupNonUniformAny: op = nir_intrinsic_vote_any; break; case SpvOpGroupNonUniformAllEqual: { switch (glsl_get_base_type(val->type->type)) { case GLSL_TYPE_FLOAT: case GLSL_TYPE_DOUBLE: op = nir_intrinsic_vote_feq; break; case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_UINT64: case GLSL_TYPE_INT64: case GLSL_TYPE_BOOL: op = nir_intrinsic_vote_ieq; break; default: unreachable("Unhandled type"); } break; } default: unreachable("Unhandled opcode"); } nir_ssa_def *src0 = vtn_ssa_value(b, w[4])->def; nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); intrin->num_components = src0->num_components; intrin->src[0] = nir_src_for_ssa(src0); nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, val->type->type, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); val->ssa->def = &intrin->dest.ssa; break; } case SpvOpGroupNonUniformShuffle: case SpvOpGroupNonUniformShuffleXor: case SpvOpGroupNonUniformShuffleUp: case SpvOpGroupNonUniformShuffleDown: { nir_intrinsic_op op; switch (opcode) { case SpvOpGroupNonUniformShuffle: op = nir_intrinsic_shuffle; break; case SpvOpGroupNonUniformShuffleXor: op = nir_intrinsic_shuffle_xor; break; case SpvOpGroupNonUniformShuffleUp: op = nir_intrinsic_shuffle_up; break; case SpvOpGroupNonUniformShuffleDown: op = nir_intrinsic_shuffle_down; break; default: unreachable("Invalid opcode"); } vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]), vtn_ssa_value(b, w[5])->def, 0, 0); break; } case SpvOpGroupNonUniformQuadBroadcast: vtn_build_subgroup_instr(b, nir_intrinsic_quad_broadcast, val->ssa, vtn_ssa_value(b, w[4]), vtn_ssa_value(b, w[5])->def, 0, 0); break; case SpvOpGroupNonUniformQuadSwap: { unsigned direction = vtn_constant_uint(b, w[5]); nir_intrinsic_op op; switch (direction) { case 0: op = nir_intrinsic_quad_swap_horizontal; break; case 1: op = nir_intrinsic_quad_swap_vertical; break; case 2: op = nir_intrinsic_quad_swap_diagonal; break; default: vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap"); } vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]), NULL, 0, 0); break; } case SpvOpGroupNonUniformIAdd: case SpvOpGroupNonUniformFAdd: case SpvOpGroupNonUniformIMul: case SpvOpGroupNonUniformFMul: case SpvOpGroupNonUniformSMin: case SpvOpGroupNonUniformUMin: case SpvOpGroupNonUniformFMin: case SpvOpGroupNonUniformSMax: case SpvOpGroupNonUniformUMax: case SpvOpGroupNonUniformFMax: case SpvOpGroupNonUniformBitwiseAnd: case SpvOpGroupNonUniformBitwiseOr: case SpvOpGroupNonUniformBitwiseXor: case SpvOpGroupNonUniformLogicalAnd: case SpvOpGroupNonUniformLogicalOr: case SpvOpGroupNonUniformLogicalXor: { nir_op reduction_op; switch (opcode) { case SpvOpGroupNonUniformIAdd: reduction_op = nir_op_iadd; break; case SpvOpGroupNonUniformFAdd: reduction_op = nir_op_fadd; break; case SpvOpGroupNonUniformIMul: reduction_op = nir_op_imul; break; case SpvOpGroupNonUniformFMul: reduction_op = nir_op_fmul; break; case SpvOpGroupNonUniformSMin: reduction_op = nir_op_imin; break; case SpvOpGroupNonUniformUMin: reduction_op = nir_op_umin; break; case SpvOpGroupNonUniformFMin: reduction_op = nir_op_fmin; break; case SpvOpGroupNonUniformSMax: reduction_op = nir_op_imax; break; case SpvOpGroupNonUniformUMax: reduction_op = nir_op_umax; break; case SpvOpGroupNonUniformFMax: reduction_op = nir_op_fmax; break; case SpvOpGroupNonUniformBitwiseAnd: case SpvOpGroupNonUniformLogicalAnd: reduction_op = nir_op_iand; break; case SpvOpGroupNonUniformBitwiseOr: case SpvOpGroupNonUniformLogicalOr: reduction_op = nir_op_ior; break; case SpvOpGroupNonUniformBitwiseXor: case SpvOpGroupNonUniformLogicalXor: reduction_op = nir_op_ixor; break; default: unreachable("Invalid reduction operation"); } nir_intrinsic_op op; unsigned cluster_size = 0; switch ((SpvGroupOperation)w[4]) { case SpvGroupOperationReduce: op = nir_intrinsic_reduce; break; case SpvGroupOperationInclusiveScan: op = nir_intrinsic_inclusive_scan; break; case SpvGroupOperationExclusiveScan: op = nir_intrinsic_exclusive_scan; break; case SpvGroupOperationClusteredReduce: op = nir_intrinsic_reduce; assert(count == 7); cluster_size = vtn_constant_uint(b, w[6]); break; default: unreachable("Invalid group operation"); } vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[5]), NULL, reduction_op, cluster_size); break; } default: unreachable("Invalid SPIR-V opcode"); } }
static nir_shader * build_nir_itob_compute_shader(struct radv_device *dev) { nir_builder b; const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); b.shader->info->name = ralloc_strdup(b.shader, "meta_itob_cs"); b.shader->info->cs.local_size[0] = 16; b.shader->info->cs.local_size[1] = 16; b.shader->info->cs.local_size[2] = 1; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; input_img->data.binding = 0; nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); output_img->data.descriptor_set = 0; output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info->cs.local_size[0], b.shader->info->cs.local_size[1], b.shader->info->cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); offset->num_components = 2; nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset"); nir_builder_instr_insert(&b, &offset->instr); nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); stride->num_components = 1; nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); nir_builder_instr_insert(&b, &stride->instr); nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); tex->sampler_dim = GLSL_SAMPLER_DIM_2D; tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(img_coord); tex->src[1].src_type = nir_tex_src_lod; tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); tex->dest_type = nir_type_float; tex->is_array = false; tex->coord_components = 2; tex->texture = nir_deref_var_create(tex, input_img); tex->sampler = NULL; nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); nir_builder_instr_insert(&b, &tex->instr); nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); tmp = nir_iadd(&b, tmp, pos_x); nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp); nir_ssa_def *outval = &tex->dest.ssa; nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); store->src[0] = nir_src_for_ssa(coord); store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); store->src[2] = nir_src_for_ssa(outval); store->variables[0] = nir_deref_var_create(store, output_img); nir_builder_instr_insert(&b, &store->instr); return b.shader; }
static void _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, nir_deref *tail, struct vtn_ssa_value *inout) { /* The deref tail may contain a deref to select a component of a vector (in * other words, it might not be an actual tail) so we have to save it away * here since we overwrite it later. */ nir_deref *old_child = tail->child; if (glsl_type_is_vector_or_scalar(tail->type)) { /* Terminate the deref chain in case there is one more link to pick * off a component of the vector. */ tail->child = NULL; nir_intrinsic_op op = load ? nir_intrinsic_load_var : nir_intrinsic_store_var; nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); intrin->variables[0] = nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); intrin->num_components = glsl_get_vector_elements(tail->type); if (load) { nir_ssa_dest_init(&intrin->instr, &intrin->dest, intrin->num_components, glsl_get_bit_size(glsl_get_base_type(tail->type)), NULL); inout->def = &intrin->dest.ssa; } else { nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); intrin->src[0] = nir_src_for_ssa(inout->def); } nir_builder_instr_insert(&b->nb, &intrin->instr); } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || glsl_type_is_matrix(tail->type)) { unsigned elems = glsl_get_length(tail->type); nir_deref_array *deref_arr = nir_deref_array_create(b); deref_arr->deref_array_type = nir_deref_array_type_direct; deref_arr->deref.type = glsl_get_array_element(tail->type); tail->child = &deref_arr->deref; for (unsigned i = 0; i < elems; i++) { deref_arr->base_offset = i; _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } else { assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); unsigned elems = glsl_get_length(tail->type); nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); tail->child = &deref_struct->deref; for (unsigned i = 0; i < elems; i++) { deref_struct->index = i; deref_struct->deref.type = glsl_get_struct_field(tail->type, i); _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } tail->child = old_child; }
/* This function recursively walks the given deref chain and replaces the * given copy instruction with an equivalent sequence load/store * operations. * * @copy_instr The copy instruction to replace; new instructions will be * inserted before this one * * @dest_head The head of the destination variable deref chain * * @src_head The head of the source variable deref chain * * @dest_tail The current tail of the destination variable deref chain; * this is used for recursion and external callers of this * function should call it with tail == head * * @src_tail The current tail of the source variable deref chain; * this is used for recursion and external callers of this * function should call it with tail == head * * @state The current variable lowering state */ static void emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_deref_var *dest_head, nir_deref_var *src_head, nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx) { /* Find the next pair of wildcards */ nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail); nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail); if (src_arr_parent || dest_arr_parent) { /* Wildcards had better come in matched pairs */ assert(dest_arr_parent && dest_arr_parent); nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); unsigned length = glsl_get_length(src_arr_parent->type); /* The wildcards should represent the same number of elements */ assert(length == glsl_get_length(dest_arr_parent->type)); assert(length > 0); /* Walk over all of the elements that this wildcard refers to and * call emit_copy_load_store on each one of them */ src_arr->deref_array_type = nir_deref_array_type_direct; dest_arr->deref_array_type = nir_deref_array_type_direct; for (unsigned i = 0; i < length; i++) { src_arr->base_offset = i; dest_arr->base_offset = i; emit_copy_load_store(copy_instr, dest_head, src_head, &dest_arr->deref, &src_arr->deref, mem_ctx); } src_arr->deref_array_type = nir_deref_array_type_wildcard; dest_arr->deref_array_type = nir_deref_array_type_wildcard; } else { /* In this case, we have no wildcards anymore, so all we have to do * is just emit the load and store operations. */ src_tail = nir_deref_tail(src_tail); dest_tail = nir_deref_tail(dest_tail); assert(src_tail->type == dest_tail->type); unsigned num_components = glsl_get_vector_elements(src_tail->type); nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); load->num_components = num_components; load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref)); nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); nir_instr_insert_before(©_instr->instr, &load->instr); nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); store->num_components = num_components; store->const_index[0] = (1 << num_components) - 1; store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); store->src[0].is_ssa = true; store->src[0].ssa = &load->dest.ssa; nir_instr_insert_before(©_instr->instr, &store->instr); } }
static bool nir_lower_io_block(nir_block *block, void *void_state) { struct lower_io_state *state = void_state; nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_load_var: { nir_variable_mode mode = intrin->variables[0]->var->data.mode; if (mode != nir_var_shader_in && mode != nir_var_uniform) continue; bool has_indirect = deref_has_indirect(intrin->variables[0]); /* Figure out the opcode */ nir_intrinsic_op load_op; switch (mode) { case nir_var_shader_in: load_op = has_indirect ? nir_intrinsic_load_input_indirect : nir_intrinsic_load_input; break; case nir_var_uniform: load_op = has_indirect ? nir_intrinsic_load_uniform_indirect : nir_intrinsic_load_uniform; break; default: unreachable("Unknown variable mode"); } nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, load_op); load->num_components = intrin->num_components; nir_src indirect; unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, &indirect, state); offset += intrin->variables[0]->var->data.driver_location; load->const_index[0] = offset; if (has_indirect) load->src[0] = indirect; if (intrin->dest.is_ssa) { nir_ssa_dest_init(&load->instr, &load->dest, intrin->num_components, NULL); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&load->dest.ssa), state->mem_ctx); } else { nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx); } nir_instr_insert_before(&intrin->instr, &load->instr); nir_instr_remove(&intrin->instr); break; } case nir_intrinsic_store_var: { if (intrin->variables[0]->var->data.mode != nir_var_shader_out) continue; bool has_indirect = deref_has_indirect(intrin->variables[0]); nir_intrinsic_op store_op; if (has_indirect) { store_op = nir_intrinsic_store_output_indirect; } else { store_op = nir_intrinsic_store_output; } nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, store_op); store->num_components = intrin->num_components; nir_src indirect; unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, &indirect, state); offset += intrin->variables[0]->var->data.driver_location; store->const_index[0] = offset; nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx); if (has_indirect) store->src[1] = indirect; nir_instr_insert_before(&intrin->instr, &store->instr); nir_instr_remove(&intrin->instr); break; } default: break; } } return true; }
b.shader->info->cs.local_size[1] = 1; b.shader->info->cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info->cs.local_size[0], b.shader->info->cs.local_size[1], b.shader->info->cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false); nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, nir_intrinsic_vulkan_resource_index); dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); nir_intrinsic_set_desc_set(dst_buf, 0); nir_intrinsic_set_binding(dst_buf, 0); nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); nir_builder_instr_insert(&b, &dst_buf->instr); nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); load->num_components = 1; nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value"); nir_builder_instr_insert(&b, &load->instr); nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false); nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
/* Recursively constructs deref chains to split a copy instruction into * multiple (if needed) copy instructions with full-length deref chains. * External callers of this function should pass the tail and head of the * deref chains found as the source and destination of the copy instruction * into this function. * * \param old_copy The copy instruction we are splitting * \param dest_head The head of the destination deref chain we are building * \param src_head The head of the source deref chain we are building * \param dest_tail The tail of the destination deref chain we are building * \param src_tail The tail of the source deref chain we are building * \param state The current split_var_copies_state object */ static void split_var_copy_instr(nir_intrinsic_instr *old_copy, nir_deref *dest_head, nir_deref *src_head, nir_deref *dest_tail, nir_deref *src_tail, struct split_var_copies_state *state) { assert(src_tail->type == dest_tail->type); /* Make sure these really are the tails of the deref chains */ assert(dest_tail->child == NULL); assert(src_tail->child == NULL); switch (glsl_get_base_type(src_tail->type)) { case GLSL_TYPE_ARRAY: { /* Make a wildcard dereference */ nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); deref->deref.type = glsl_get_array_element(src_tail->type); deref->deref_array_type = nir_deref_array_type_wildcard; /* Set the tail of both as the newly created wildcard deref. It is * safe to use the same wildcard in both places because a) we will be * copying it before we put it in an actual instruction and b) * everything that will potentially add another link in the deref * chain will also add the same thing to both chains. */ src_tail->child = &deref->deref; dest_tail->child = &deref->deref; split_var_copy_instr(old_copy, dest_head, src_head, dest_tail->child, src_tail->child, state); /* Set it back to the way we found it */ src_tail->child = NULL; dest_tail->child = NULL; break; } case GLSL_TYPE_STRUCT: /* This is the only part that actually does any interesting * splitting. For array types, we just use wildcards and resolve * them later. For structure types, we need to emit one copy * instruction for every structure element. Because we may have * structs inside structs, we just recurse and let the next level * take care of any additional structures. */ for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) { nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i); deref->deref.type = glsl_get_struct_field(src_tail->type, i); /* Set the tail of both as the newly created structure deref. It * is safe to use the same wildcard in both places because a) we * will be copying it before we put it in an actual instruction * and b) everything that will potentially add another link in the * deref chain will also add the same thing to both chains. */ src_tail->child = &deref->deref; dest_tail->child = &deref->deref; split_var_copy_instr(old_copy, dest_head, src_head, dest_tail->child, src_tail->child, state); } /* Set it back to the way we found it */ src_tail->child = NULL; dest_tail->child = NULL; break; case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: if (glsl_type_is_matrix(src_tail->type)) { nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); deref->deref.type = glsl_get_column_type(src_tail->type); deref->deref_array_type = nir_deref_array_type_wildcard; /* Set the tail of both as the newly created wildcard deref. It * is safe to use the same wildcard in both places because a) we * will be copying it before we put it in an actual instruction * and b) everything that will potentially add another link in the * deref chain will also add the same thing to both chains. */ src_tail->child = &deref->deref; dest_tail->child = &deref->deref; split_var_copy_instr(old_copy, dest_head, src_head, dest_tail->child, src_tail->child, state); /* Set it back to the way we found it */ src_tail->child = NULL; dest_tail->child = NULL; } else { /* At this point, we have fully built our deref chains and can * actually add the new copy instruction. */ nir_intrinsic_instr *new_copy = nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var); /* We need to make copies because a) this deref chain actually * belongs to the copy instruction and b) the deref chains may * have some of the same links due to the way we constructed them */ nir_deref *src = nir_copy_deref(new_copy, src_head); nir_deref *dest = nir_copy_deref(new_copy, dest_head); new_copy->variables[0] = nir_deref_as_var(dest); new_copy->variables[1] = nir_deref_as_var(src); /* Emit the copy instruction after the old instruction. We'll * remove the old one later. */ nir_instr_insert_after(&old_copy->instr, &new_copy->instr); state->progress = true; } break; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_INTERFACE: default: unreachable("Cannot copy these types"); } }
static void lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) { nir_intrinsic_op op; switch (instr->intrinsic) { case nir_intrinsic_atomic_counter_read_var: op = nir_intrinsic_atomic_counter_read; break; case nir_intrinsic_atomic_counter_inc_var: op = nir_intrinsic_atomic_counter_inc; break; case nir_intrinsic_atomic_counter_dec_var: op = nir_intrinsic_atomic_counter_dec; break; default: return; } if (instr->variables[0]->var->data.mode != nir_var_uniform) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); new_instr->const_index[0] = (int) instr->variables[0]->var->data.atomic.buffer_index; nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; nir_instr_insert_before(&instr->instr, &offset_const->instr); nir_ssa_def *offset_def = &offset_const->def; if (instr->variables[0]->deref.child != NULL) { assert(instr->variables[0]->deref.child->deref_type == nir_deref_type_array); nir_deref_array *deref_array = nir_deref_as_array(instr->variables[0]->deref.child); assert(deref_array->deref.child == NULL); offset_const->value.u[0] += deref_array->base_offset * ATOMIC_COUNTER_SIZE; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_load_const_instr *atomic_counter_size = nir_load_const_instr_create(mem_ctx, 1); atomic_counter_size->value.u[0] = ATOMIC_COUNTER_SIZE; nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr); nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul); nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); mul->dest.write_mask = 0x1; nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx); mul->src[1].src.is_ssa = true; mul->src[1].src.ssa = &atomic_counter_size->def; nir_instr_insert_before(&instr->instr, &mul->instr); nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd); nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); add->dest.write_mask = 0x1; add->src[0].src.is_ssa = true; add->src[0].src.ssa = &mul->dest.dest.ssa; add->src[1].src.is_ssa = true; add->src[1].src.ssa = &offset_const->def; nir_instr_insert_before(&instr->instr, &add->instr); offset_def = &add->dest.dest.ssa; } } new_instr->src[0].is_ssa = true; new_instr->src[0].ssa = offset_def;; if (instr->dest.is_ssa) { nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, instr->dest.ssa.num_components, NULL); nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&new_instr->dest.ssa), mem_ctx); } else { nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); } nir_instr_insert_before(&instr->instr, &new_instr->instr); nir_instr_remove(&instr->instr); }
static nir_shader * build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int samples) { nir_builder b; char name[64]; nir_if *outer_if = NULL; const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float"); nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); b.shader->info->name = ralloc_strdup(b.shader, name); b.shader->info->cs.local_size[0] = 16; b.shader->info->cs.local_size[1] = 16; b.shader->info->cs.local_size[2] = 1; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; input_img->data.binding = 0; nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); output_img->data.descriptor_set = 0; output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info->cs.local_size[0], b.shader->info->cs.local_size[1], b.shader->info->cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); src_offset->num_components = 2; nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset"); nir_builder_instr_insert(&b, &src_offset->instr); nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); dst_offset->num_components = 2; nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset"); nir_builder_instr_insert(&b, &dst_offset->instr); nir_ssa_def *img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa); /* do a txf_ms on each sample */ nir_ssa_def *tmp; nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); tex->sampler_dim = GLSL_SAMPLER_DIM_MS; tex->op = nir_texop_txf_ms; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(img_coord); tex->src[1].src_type = nir_tex_src_ms_index; tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); tex->dest_type = nir_type_float; tex->is_array = false; tex->coord_components = 2; tex->texture = nir_deref_var_create(tex, input_img); tex->sampler = NULL; nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); nir_builder_instr_insert(&b, &tex->instr); tmp = &tex->dest.ssa; nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color"); if (!is_integer && samples > 1) { nir_tex_instr *tex_all_same = nir_tex_instr_create(b.shader, 1); tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS; tex_all_same->op = nir_texop_samples_identical; tex_all_same->src[0].src_type = nir_tex_src_coord; tex_all_same->src[0].src = nir_src_for_ssa(img_coord); tex_all_same->dest_type = nir_type_float; tex_all_same->is_array = false; tex_all_same->coord_components = 2; tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img); tex_all_same->sampler = NULL; nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex"); nir_builder_instr_insert(&b, &tex_all_same->instr); nir_ssa_def *all_same = nir_ine(&b, &tex_all_same->dest.ssa, nir_imm_int(&b, 0)); nir_if *if_stmt = nir_if_create(b.shader); if_stmt->condition = nir_src_for_ssa(all_same); nir_cf_node_insert(b.cursor, &if_stmt->cf_node); b.cursor = nir_after_cf_list(&if_stmt->then_list); for (int i = 1; i < samples; i++) { nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 2); tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS; tex_add->op = nir_texop_txf_ms; tex_add->src[0].src_type = nir_tex_src_coord; tex_add->src[0].src = nir_src_for_ssa(img_coord); tex_add->src[1].src_type = nir_tex_src_ms_index; tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); tex_add->dest_type = nir_type_float; tex_add->is_array = false; tex_add->coord_components = 2; tex_add->texture = nir_deref_var_create(tex_add, input_img); tex_add->sampler = NULL; nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex"); nir_builder_instr_insert(&b, &tex_add->instr); tmp = nir_fadd(&b, tmp, &tex_add->dest.ssa); } tmp = nir_fdiv(&b, tmp, nir_imm_float(&b, samples)); nir_store_var(&b, color, tmp, 0xf); b.cursor = nir_after_cf_list(&if_stmt->else_list); outer_if = if_stmt; } nir_store_var(&b, color, &tex->dest.ssa, 0xf); if (outer_if) b.cursor = nir_after_cf_node(&outer_if->cf_node); nir_ssa_def *newv = nir_load_var(&b, color); nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa); nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); store->src[0] = nir_src_for_ssa(coord); store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); store->src[2] = nir_src_for_ssa(newv); store->variables[0] = nir_deref_var_create(store, output_img); nir_builder_instr_insert(&b, &store->instr); return b.shader; }
static void *vc4_get_yuv_vs(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); struct pipe_screen *pscreen = pctx->screen; if (vc4->yuv_linear_blit_vs) return vc4->yuv_linear_blit_vs; const struct nir_shader_compiler_options *options = pscreen->get_compiler_options(pscreen, PIPE_SHADER_IR_NIR, PIPE_SHADER_VERTEX); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options); b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs"); const struct glsl_type *vec4 = glsl_vec4_type(); nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position"); pos_out->data.location = VARYING_SLOT_POS; nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); struct pipe_shader_state shader_tmpl = { .type = PIPE_SHADER_IR_NIR, .ir.nir = b.shader, }; vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); return vc4->yuv_linear_blit_vs; } static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp) { struct vc4_context *vc4 = vc4_context(pctx); struct pipe_screen *pscreen = pctx->screen; struct pipe_shader_state **cached_shader; const char *name; if (cpp == 1) { cached_shader = &vc4->yuv_linear_blit_fs_8bit; name = "linear_blit_8bit_fs"; } else { cached_shader = &vc4->yuv_linear_blit_fs_16bit; name = "linear_blit_16bit_fs"; } if (*cached_shader) return *cached_shader; const struct nir_shader_compiler_options *options = pscreen->get_compiler_options(pscreen, PIPE_SHADER_IR_NIR, PIPE_SHADER_FRAGMENT); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options); b.shader->info.name = ralloc_strdup(b.shader, name); const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *glsl_int = glsl_int_type(); nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_COLOR; nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); pos_in->data.location = VARYING_SLOT_POS; nir_ssa_def *pos = nir_load_var(&b, pos_in); nir_ssa_def *one = nir_imm_int(&b, 1); nir_ssa_def *two = nir_imm_int(&b, 2); nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform, glsl_int, "stride"); nir_ssa_def *stride = nir_load_var(&b, stride_in); nir_ssa_def *x_offset; nir_ssa_def *y_offset; if (cpp == 1) { nir_ssa_def *intra_utile_x_offset = nir_ishl(&b, nir_iand(&b, x, one), two); nir_ssa_def *inter_utile_x_offset = nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one); x_offset = nir_iadd(&b, intra_utile_x_offset, inter_utile_x_offset); y_offset = nir_imul(&b, nir_iadd(&b, nir_ishl(&b, y, one), nir_ushr(&b, nir_iand(&b, x, two), one)), stride); } else { x_offset = nir_ishl(&b, x, two); y_offset = nir_imul(&b, y, stride); } nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo); load->num_components = 1; nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL); load->src[0] = nir_src_for_ssa(one); load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset)); nir_builder_instr_insert(&b, &load->instr); nir_store_var(&b, color_out, nir_unpack_unorm_4x8(&b, &load->dest.ssa), 0xf); struct pipe_shader_state shader_tmpl = { .type = PIPE_SHADER_IR_NIR, .ir.nir = b.shader, }; *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); return *cached_shader; } static bool vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_resource *src = vc4_resource(info->src.resource); struct vc4_resource *dst = vc4_resource(info->dst.resource); bool ok; if (src->tiled) return false; if (src->base.format != PIPE_FORMAT_R8_UNORM && src->base.format != PIPE_FORMAT_R8G8_UNORM) return false; /* YUV blits always turn raster-order to tiled */ assert(dst->base.format == src->base.format); assert(dst->tiled); /* Always 1:1 and at the origin */ assert(info->src.box.x == 0 && info->dst.box.x == 0); assert(info->src.box.y == 0 && info->dst.box.y == 0); assert(info->src.box.width == info->dst.box.width); assert(info->src.box.height == info->dst.box.height); if ((src->slices[info->src.level].offset & 3) || (src->slices[info->src.level].stride & 3)) { perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n", src->slices[info->src.level].offset, src->slices[info->src.level].stride); goto fallback; } vc4_blitter_save(vc4); /* Create a renderable surface mapping the T-tiled shadow buffer. */ struct pipe_surface dst_tmpl; util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, info->dst.level, info->dst.box.z); dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM; struct pipe_surface *dst_surf = pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); if (!dst_surf) { fprintf(stderr, "Failed to create YUV dst surface\n"); util_blitter_unset_running_flag(vc4->blitter); return false; } dst_surf->width /= 2; if (dst->cpp == 1) dst_surf->height /= 2; /* Set the constant buffer. */ uint32_t stride = src->slices[info->src.level].stride; struct pipe_constant_buffer cb_uniforms = { .user_buffer = &stride, .buffer_size = sizeof(stride), }; pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms); struct pipe_constant_buffer cb_src = { .buffer = info->src.resource, .buffer_offset = src->slices[info->src.level].offset, .buffer_size = (src->bo->size - src->slices[info->src.level].offset), }; pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src); /* Unbind the textures, to make sure we don't try to recurse into the * shadow blit. */ pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); util_blitter_custom_shader(vc4->blitter, dst_surf, vc4_get_yuv_vs(pctx), vc4_get_yuv_fs(pctx, src->cpp)); util_blitter_restore_textures(vc4->blitter); util_blitter_restore_constant_buffer_state(vc4->blitter); /* Restore cb1 (util_blitter doesn't handle this one). */ struct pipe_constant_buffer cb_disabled = { 0 }; pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled); pipe_surface_reference(&dst_surf, NULL); return true; fallback: /* Do an immediate SW fallback, since the render blit path * would just recurse. */ ok = util_try_blit_via_copy_region(pctx, info); assert(ok); (void)ok; return true; } static bool vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(ctx); if (!util_blitter_is_blit_supported(vc4->blitter, info)) { fprintf(stderr, "blit unsupported %s -> %s\n", util_format_short_name(info->src.resource->format), util_format_short_name(info->dst.resource->format)); return false; } /* Enable the scissor, so we get a minimal set of tiles rendered. */ if (!info->scissor_enable) { info->scissor_enable = true; info->scissor.minx = info->dst.box.x; info->scissor.miny = info->dst.box.y; info->scissor.maxx = info->dst.box.x + info->dst.box.width; info->scissor.maxy = info->dst.box.y + info->dst.box.height; } vc4_blitter_save(vc4); util_blitter_blit(vc4->blitter, info); return true; } /* Optimal hardware path for blitting pixels. * Scaling, format conversion, up- and downsampling (resolve) are allowed. */ void vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) { struct pipe_blit_info info = *blit_info; if (vc4_yuv_blit(pctx, blit_info)) return; if (vc4_tile_blit(pctx, blit_info)) return; if (info.mask & PIPE_MASK_S) { if (util_try_blit_via_copy_region(pctx, &info)) return; info.mask &= ~PIPE_MASK_S; fprintf(stderr, "cannot blit stencil, skipping\n"); } if (vc4_render_blit(pctx, &info)) return; fprintf(stderr, "Unsupported blit\n"); }