static void handle_glsl450_interpolation(struct vtn_builder *b, enum GLSLstd450 opcode, const uint32_t *w, unsigned count) { const struct glsl_type *dest_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->ssa = vtn_create_ssa_value(b, dest_type); nir_intrinsic_op op; switch (opcode) { case GLSLstd450InterpolateAtCentroid: op = nir_intrinsic_interp_var_at_centroid; break; case GLSLstd450InterpolateAtSample: op = nir_intrinsic_interp_var_at_sample; break; case GLSLstd450InterpolateAtOffset: op = nir_intrinsic_interp_var_at_offset; break; default: unreachable("Invalid opcode"); } nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); nir_deref_var *deref = vtn_nir_deref(b, w[5]); intrin->variables[0] = nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); switch (opcode) { case GLSLstd450InterpolateAtCentroid: break; case GLSLstd450InterpolateAtSample: case GLSLstd450InterpolateAtOffset: intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); break; default: unreachable("Invalid opcode"); } intrin->num_components = glsl_get_vector_elements(dest_type); nir_ssa_dest_init(&intrin->instr, &intrin->dest, glsl_get_vector_elements(dest_type), glsl_get_bit_size(dest_type), NULL); val->ssa->def = &intrin->dest.ssa; nir_builder_instr_insert(&b->nb, &intrin->instr); }
static nir_register * get_reg_for_deref(nir_deref_instr *deref, struct locals_to_regs_state *state) { uint32_t hash = hash_deref(deref); assert(nir_deref_instr_get_variable(deref)->constant_initializer == NULL); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref); if (entry) return entry->data; unsigned array_size = 1; for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { if (d->deref_type == nir_deref_type_array) array_size *= glsl_get_length(nir_deref_instr_parent(d)->type); } assert(glsl_type_is_vector_or_scalar(deref->type)); nir_register *reg = nir_local_reg_create(state->builder.impl); reg->num_components = glsl_get_vector_elements(deref->type); reg->num_array_elems = array_size > 1 ? array_size : 0; reg->bit_size = glsl_get_bit_size(deref->type); _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg); return reg; }
static nir_register * get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) { uint32_t hash = hash_deref(deref); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref); if (entry) return entry->data; unsigned array_size = 1; nir_deref *tail = &deref->deref; while (tail->child) { if (tail->child->deref_type == nir_deref_type_array) array_size *= glsl_get_length(tail->type); tail = tail->child; } assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type)); nir_register *reg = nir_local_reg_create(state->impl); reg->num_components = glsl_get_vector_elements(tail->type); reg->num_array_elems = array_size > 1 ? array_size : 0; reg->bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type)); _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg); nir_array_add(&state->derefs_array, nir_deref_var *, deref); return reg; }
static struct vtn_ssa_value * matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) { nir_ssa_def *adj_col[4]; unsigned size = glsl_get_vector_elements(src->type); /* Build up an adjugate matrix */ for (unsigned c = 0; c < size; c++) { nir_ssa_def *elem[4]; for (unsigned r = 0; r < size; r++) { elem[r] = build_mat_subdet(&b->nb, src, size, c, r); if ((r + c) % 2) elem[r] = nir_fneg(&b->nb, elem[r]); } adj_col[c] = nir_vec(&b->nb, elem, size); } nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); for (unsigned i = 0; i < size; i++) val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); return val; }
/* Tries to compute the size of an interface block based on the strides and * offsets that are provided to us in the SPIR-V source. */ static unsigned vtn_type_block_size(struct vtn_type *type) { enum glsl_base_type base_type = glsl_get_base_type(type->type); switch (base_type) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: case GLSL_TYPE_DOUBLE: { unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : glsl_get_matrix_columns(type->type); if (cols > 1) { assert(type->stride > 0); return type->stride * cols; } else if (base_type == GLSL_TYPE_DOUBLE) { return glsl_get_vector_elements(type->type) * 8; } else { return glsl_get_vector_elements(type->type) * 4; } } case GLSL_TYPE_STRUCT: case GLSL_TYPE_INTERFACE: { unsigned size = 0; unsigned num_fields = glsl_get_length(type->type); for (unsigned f = 0; f < num_fields; f++) { unsigned field_end = type->offsets[f] + vtn_type_block_size(type->members[f]); size = MAX2(size, field_end); } return size; } case GLSL_TYPE_ARRAY: assert(type->stride > 0); assert(glsl_get_length(type->type) > 0); return type->stride * glsl_get_length(type->type); default: assert(!"Invalid block type"); return 0; } }
static nir_ssa_def * build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) { unsigned size = glsl_get_vector_elements(src->type); nir_ssa_def *cols[4]; for (unsigned i = 0; i < size; i++) cols[i] = src->elems[i]->def; switch(size) { case 2: return build_mat2_det(&b->nb, cols); case 3: return build_mat3_det(&b->nb, cols); case 4: return build_mat4_det(&b->nb, cols); default: unreachable("Invalid matrix size"); } }
static void _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_ssa_def *index, nir_ssa_def *offset, struct vtn_ssa_value **inout, const struct glsl_type *type) { nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); instr->num_components = glsl_get_vector_elements(type); int src = 0; if (!load) { nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1); instr->src[src++] = nir_src_for_ssa((*inout)->def); } /* We set the base and size for push constant load to the entire push * constant block for now. */ if (op == nir_intrinsic_load_push_constant) { nir_intrinsic_set_base(instr, 0); nir_intrinsic_set_range(instr, 128); } if (index) instr->src[src++] = nir_src_for_ssa(index); instr->src[src++] = nir_src_for_ssa(offset); if (load) { nir_ssa_dest_init(&instr->instr, &instr->dest, instr->num_components, glsl_get_bit_size(glsl_get_base_type(type)), NULL); (*inout)->def = &instr->dest.ssa; } nir_builder_instr_insert(&b->nb, &instr->instr); if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); }
/* This function recursively walks the given deref chain and replaces the * given copy instruction with an equivalent sequence load/store * operations. * * @copy_instr The copy instruction to replace; new instructions will be * inserted before this one * * @dest_head The head of the destination variable deref chain * * @src_head The head of the source variable deref chain * * @dest_tail The current tail of the destination variable deref chain; * this is used for recursion and external callers of this * function should call it with tail == head * * @src_tail The current tail of the source variable deref chain; * this is used for recursion and external callers of this * function should call it with tail == head * * @state The current variable lowering state */ static void emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_deref_var *dest_head, nir_deref_var *src_head, nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx) { /* Find the next pair of wildcards */ nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail); nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail); if (src_arr_parent || dest_arr_parent) { /* Wildcards had better come in matched pairs */ assert(dest_arr_parent && dest_arr_parent); nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); unsigned length = glsl_get_length(src_arr_parent->type); /* The wildcards should represent the same number of elements */ assert(length == glsl_get_length(dest_arr_parent->type)); assert(length > 0); /* Walk over all of the elements that this wildcard refers to and * call emit_copy_load_store on each one of them */ src_arr->deref_array_type = nir_deref_array_type_direct; dest_arr->deref_array_type = nir_deref_array_type_direct; for (unsigned i = 0; i < length; i++) { src_arr->base_offset = i; dest_arr->base_offset = i; emit_copy_load_store(copy_instr, dest_head, src_head, &dest_arr->deref, &src_arr->deref, mem_ctx); } src_arr->deref_array_type = nir_deref_array_type_wildcard; dest_arr->deref_array_type = nir_deref_array_type_wildcard; } else { /* In this case, we have no wildcards anymore, so all we have to do * is just emit the load and store operations. */ src_tail = nir_deref_tail(src_tail); dest_tail = nir_deref_tail(dest_tail); assert(src_tail->type == dest_tail->type); unsigned num_components = glsl_get_vector_elements(src_tail->type); nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); load->num_components = num_components; load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref)); nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); nir_instr_insert_before(©_instr->instr, &load->instr); nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); store->num_components = num_components; store->const_index[0] = (1 << num_components) - 1; store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); store->src[0].is_ssa = true; store->src[0].ssa = &load->dest.ssa; nir_instr_insert_before(©_instr->instr, &store->instr); } }
static struct vtn_ssa_value * matrix_multiply(struct vtn_builder *b, struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) { struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); unsigned src0_rows = glsl_get_vector_elements(src0->type); unsigned src0_columns = glsl_get_matrix_columns(src0->type); unsigned src1_columns = glsl_get_matrix_columns(src1->type); const struct glsl_type *dest_type; if (src1_columns > 1) { dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), src0_rows, src1_columns); } else { dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); } struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); dest = wrap_matrix(b, dest); bool transpose_result = false; if (src0_transpose && src1_transpose) { /* transpose(A) * transpose(B) = transpose(B * A) */ src1 = src0_transpose; src0 = src1_transpose; src0_transpose = NULL; src1_transpose = NULL; transpose_result = true; } if (src0_transpose && !src1_transpose && glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { /* We already have the rows of src0 and the columns of src1 available, * so we can just take the dot product of each row with each column to * get the result. */ for (unsigned i = 0; i < src1_columns; i++) { nir_ssa_def *vec_src[4]; for (unsigned j = 0; j < src0_rows; j++) { vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, src1->elems[i]->def); } dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); } } else { /* We don't handle the case where src1 is transposed but not src0, since * the general case only uses individual components of src1 so the * optimizer should chew through the transpose we emitted for src1. */ for (unsigned i = 0; i < src1_columns; i++) { /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ dest->elems[i]->def = nir_fmul(&b->nb, src0->elems[0]->def, nir_channel(&b->nb, src1->elems[i]->def, 0)); for (unsigned j = 1; j < src0_columns; j++) { dest->elems[i]->def = nir_fadd(&b->nb, dest->elems[i]->def, nir_fmul(&b->nb, src0->elems[j]->def, nir_channel(&b->nb, src1->elems[i]->def, j))); } } } dest = unwrap_matrix(dest); if (transpose_result) dest = vtn_ssa_transpose(b, dest); return dest; }
static void validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) { unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; for (unsigned i = 0; i < num_srcs; i++) { unsigned components_read = nir_intrinsic_infos[instr->intrinsic].src_components[i]; if (components_read == 0) components_read = instr->num_components; validate_assert(state, components_read > 0); if (instr->src[i].is_ssa) { validate_assert(state, components_read <= instr->src[i].ssa->num_components); } else if (!instr->src[i].reg.reg->is_packed) { validate_assert(state, components_read <= instr->src[i].reg.reg->num_components); } validate_src(&instr->src[i], state); } unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; for (unsigned i = 0; i < num_vars; i++) { validate_deref_var(instr, instr->variables[i], state); } if (nir_intrinsic_infos[instr->intrinsic].has_dest) { unsigned components_written = nir_intrinsic_infos[instr->intrinsic].dest_components; if (components_written == 0) components_written = instr->num_components; validate_assert(state, components_written > 0); if (instr->dest.is_ssa) { validate_assert(state, components_written <= instr->dest.ssa.num_components); } else if (!instr->dest.reg.reg->is_packed) { validate_assert(state, components_written <= instr->dest.reg.reg->num_components); } validate_dest(&instr->dest, state); } switch (instr->intrinsic) { case nir_intrinsic_load_var: { const struct glsl_type *type = nir_deref_tail(&instr->variables[0]->deref)->type; validate_assert(state, glsl_type_is_vector_or_scalar(type) || (instr->variables[0]->var->data.mode == nir_var_uniform && glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); validate_assert(state, instr->num_components == glsl_get_vector_elements(type)); break; } case nir_intrinsic_store_var: { const struct glsl_type *type = nir_deref_tail(&instr->variables[0]->deref)->type; validate_assert(state, glsl_type_is_vector_or_scalar(type) || (instr->variables[0]->var->data.mode == nir_var_uniform && glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); validate_assert(state, instr->num_components == glsl_get_vector_elements(type)); validate_assert(state, instr->variables[0]->var->data.mode != nir_var_shader_in && instr->variables[0]->var->data.mode != nir_var_uniform && instr->variables[0]->var->data.mode != nir_var_shader_storage); validate_assert(state, (nir_intrinsic_write_mask(instr) & ~((1 << instr->num_components) - 1)) == 0); break; } case nir_intrinsic_copy_var: validate_assert(state, nir_deref_tail(&instr->variables[0]->deref)->type == nir_deref_tail(&instr->variables[1]->deref)->type); validate_assert(state, instr->variables[0]->var->data.mode != nir_var_shader_in && instr->variables[0]->var->data.mode != nir_var_uniform && instr->variables[0]->var->data.mode != nir_var_shader_storage); break; default: break; } }
static void _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_ssa_def *index, nir_ssa_def *offset, struct vtn_access_chain *chain, unsigned chain_idx, struct vtn_type *type, struct vtn_ssa_value **inout) { if (chain && chain_idx >= chain->length) chain = NULL; if (load && chain == NULL && *inout == NULL) *inout = vtn_create_ssa_value(b, type->type); enum glsl_base_type base_type = glsl_get_base_type(type->type); switch (base_type) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: /* This is where things get interesting. At this point, we've hit * a vector, a scalar, or a matrix. */ if (glsl_type_is_matrix(type->type)) { if (chain == NULL) { /* Loading the whole matrix */ struct vtn_ssa_value *transpose; unsigned num_ops, vec_width; if (type->row_major) { num_ops = glsl_get_vector_elements(type->type); vec_width = glsl_get_matrix_columns(type->type); if (load) { const struct glsl_type *transpose_type = glsl_matrix_type(base_type, vec_width, num_ops); *inout = vtn_create_ssa_value(b, transpose_type); } else { transpose = vtn_ssa_transpose(b, *inout); inout = &transpose; } } else { num_ops = glsl_get_matrix_columns(type->type); vec_width = glsl_get_vector_elements(type->type); } for (unsigned i = 0; i < num_ops; i++) { nir_ssa_def *elem_offset = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); _vtn_load_store_tail(b, op, load, index, elem_offset, &(*inout)->elems[i], glsl_vector_type(base_type, vec_width)); } if (load && type->row_major) *inout = vtn_ssa_transpose(b, *inout); } else if (type->row_major) { /* Row-major but with an access chiain. */ nir_ssa_def *col_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx], type->array_element->stride); offset = nir_iadd(&b->nb, offset, col_offset); if (chain_idx + 1 < chain->length) { /* Picking off a single element */ nir_ssa_def *row_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], type->stride); offset = nir_iadd(&b->nb, offset, row_offset); if (load) *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); _vtn_load_store_tail(b, op, load, index, offset, inout, glsl_scalar_type(base_type)); } else { /* Grabbing a column; picking one element off each row */ unsigned num_comps = glsl_get_vector_elements(type->type); const struct glsl_type *column_type = glsl_get_column_type(type->type); nir_ssa_def *comps[4]; for (unsigned i = 0; i < num_comps; i++) { nir_ssa_def *elem_offset = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); struct vtn_ssa_value *comp, temp_val; if (!load) { temp_val.def = nir_channel(&b->nb, (*inout)->def, i); temp_val.type = glsl_scalar_type(base_type); } comp = &temp_val; _vtn_load_store_tail(b, op, load, index, elem_offset, &comp, glsl_scalar_type(base_type)); comps[i] = comp->def; } if (load) { if (*inout == NULL) *inout = vtn_create_ssa_value(b, column_type); (*inout)->def = nir_vec(&b->nb, comps, num_comps); } } } else { /* Column-major with a deref. Fall through to array case. */ nir_ssa_def *col_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); offset = nir_iadd(&b->nb, offset, col_offset); _vtn_block_load_store(b, op, load, index, offset, chain, chain_idx + 1, type->array_element, inout); } } else if (chain == NULL) { /* Single whole vector */ assert(glsl_type_is_vector_or_scalar(type->type)); _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); } else { /* Single component of a vector. Fall through to array case. */ nir_ssa_def *elem_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); offset = nir_iadd(&b->nb, offset, elem_offset); _vtn_block_load_store(b, op, load, index, offset, NULL, 0, type->array_element, inout); } return; case GLSL_TYPE_ARRAY: { unsigned elems = glsl_get_length(type->type); for (unsigned i = 0; i < elems; i++) { nir_ssa_def *elem_off = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, type->array_element, &(*inout)->elems[i]); } return; } case GLSL_TYPE_STRUCT: { unsigned elems = glsl_get_length(type->type); for (unsigned i = 0; i < elems; i++) { nir_ssa_def *elem_off = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, type->members[i], &(*inout)->elems[i]); } return; } default: unreachable("Invalid block member type"); } }
static void _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, nir_deref *tail, struct vtn_ssa_value *inout) { /* The deref tail may contain a deref to select a component of a vector (in * other words, it might not be an actual tail) so we have to save it away * here since we overwrite it later. */ nir_deref *old_child = tail->child; if (glsl_type_is_vector_or_scalar(tail->type)) { /* Terminate the deref chain in case there is one more link to pick * off a component of the vector. */ tail->child = NULL; nir_intrinsic_op op = load ? nir_intrinsic_load_var : nir_intrinsic_store_var; nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); intrin->variables[0] = nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); intrin->num_components = glsl_get_vector_elements(tail->type); if (load) { nir_ssa_dest_init(&intrin->instr, &intrin->dest, intrin->num_components, glsl_get_bit_size(glsl_get_base_type(tail->type)), NULL); inout->def = &intrin->dest.ssa; } else { nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); intrin->src[0] = nir_src_for_ssa(inout->def); } nir_builder_instr_insert(&b->nb, &intrin->instr); } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || glsl_type_is_matrix(tail->type)) { unsigned elems = glsl_get_length(tail->type); nir_deref_array *deref_arr = nir_deref_array_create(b); deref_arr->deref_array_type = nir_deref_array_type_direct; deref_arr->deref.type = glsl_get_array_element(tail->type); tail->child = &deref_arr->deref; for (unsigned i = 0; i < elems; i++) { deref_arr->base_offset = i; _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } else { assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); unsigned elems = glsl_get_length(tail->type); nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); tail->child = &deref_struct->deref; for (unsigned i = 0; i < elems; i++) { deref_struct->index = i; deref_struct->deref.type = glsl_get_struct_field(tail->type, i); _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } tail->child = old_child; }