static void validate_deref_chain(nir_deref *deref, nir_variable_mode mode, validate_state *state) { validate_assert(state, deref->child == NULL || ralloc_parent(deref->child) == deref); nir_deref *parent = NULL; while (deref != NULL) { switch (deref->deref_type) { case nir_deref_type_array: if (mode == nir_var_shared) { /* Shared variables have a bit more relaxed rules because we need * to be able to handle array derefs on vectors. Fortunately, * nir_lower_io handles these just fine. */ validate_assert(state, glsl_type_is_array(parent->type) || glsl_type_is_matrix(parent->type) || glsl_type_is_vector(parent->type)); } else { /* Most of NIR cannot handle array derefs on vectors */ validate_assert(state, glsl_type_is_array(parent->type) || glsl_type_is_matrix(parent->type)); } validate_assert(state, deref->type == glsl_get_array_element(parent->type)); if (nir_deref_as_array(deref)->deref_array_type == nir_deref_array_type_indirect) validate_src(&nir_deref_as_array(deref)->indirect, state, 32, 1); break; case nir_deref_type_struct: assume(parent); /* cannot happen: deref change starts w/ nir_deref_var */ validate_assert(state, deref->type == glsl_get_struct_field(parent->type, nir_deref_as_struct(deref)->index)); break; case nir_deref_type_var: break; default: validate_assert(state, !"Invalid deref type"); break; } parent = deref; deref = deref->child; } }
const glsl_type * glsl_transposed_type(const struct glsl_type *type) { assert(glsl_type_is_matrix(type)); return glsl_type::get_instance(type->base_type, type->matrix_columns, type->vector_elements); }
static struct vtn_ssa_value * unwrap_matrix(struct vtn_ssa_value *val) { if (glsl_type_is_matrix(val->type)) return val; return val->elems[0]; }
static struct vtn_ssa_value * wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) { if (val == NULL) return NULL; if (glsl_type_is_matrix(val->type)) return val; struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); dest->type = val->type; dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); dest->elems[0] = val; return dest; }
static bool split_var_copies_block(nir_block *block, void *void_state) { struct split_var_copies_state *state = void_state; nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); if (intrinsic->intrinsic != nir_intrinsic_copy_var) continue; nir_deref *dest_head = &intrinsic->variables[0]->deref; nir_deref *src_head = &intrinsic->variables[1]->deref; nir_deref *dest_tail = get_deref_tail(dest_head); nir_deref *src_tail = get_deref_tail(src_head); switch (glsl_get_base_type(src_tail->type)) { case GLSL_TYPE_ARRAY: case GLSL_TYPE_STRUCT: split_var_copy_instr(intrinsic, dest_head, src_head, dest_tail, src_tail, state); nir_instr_remove(&intrinsic->instr); ralloc_steal(state->dead_ctx, instr); break; case GLSL_TYPE_FLOAT: case GLSL_TYPE_INT: case GLSL_TYPE_UINT: case GLSL_TYPE_BOOL: if (glsl_type_is_matrix(src_tail->type)) { split_var_copy_instr(intrinsic, dest_head, src_head, dest_tail, src_tail, state); nir_instr_remove(&intrinsic->instr); ralloc_steal(state->dead_ctx, instr); } break; default: unreachable("Invalid type"); break; } } return true; }
void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type->type; vtn_foreach_decoration(b, val, handle_no_contraction, NULL); /* Collect the various SSA sources */ const unsigned num_inputs = count - 3; struct vtn_ssa_value *vtn_src[4] = { NULL, }; for (unsigned i = 0; i < num_inputs; i++) vtn_src[i] = vtn_ssa_value(b, w[i + 3]); if (glsl_type_is_matrix(vtn_src[0]->type) || (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); b->nb.exact = false; return; } val->ssa = vtn_create_ssa_value(b, type); nir_ssa_def *src[4] = { NULL, }; for (unsigned i = 0; i < num_inputs; i++) { assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); src[i] = vtn_src[i]->def; } switch (opcode) { case SpvOpAny: if (src[0]->num_components == 1) { val->ssa->def = nir_imov(&b->nb, src[0]); } else { nir_op op; switch (src[0]->num_components) { case 2: op = nir_op_bany_inequal2; break; case 3: op = nir_op_bany_inequal3; break; case 4: op = nir_op_bany_inequal4; break; default: unreachable("invalid number of components"); } val->ssa->def = nir_build_alu(&b->nb, op, src[0], nir_imm_int(&b->nb, NIR_FALSE), NULL, NULL); } break; case SpvOpAll: if (src[0]->num_components == 1) { val->ssa->def = nir_imov(&b->nb, src[0]); } else { nir_op op; switch (src[0]->num_components) { case 2: op = nir_op_ball_iequal2; break; case 3: op = nir_op_ball_iequal3; break; case 4: op = nir_op_ball_iequal4; break; default: unreachable("invalid number of components"); } val->ssa->def = nir_build_alu(&b->nb, op, src[0], nir_imm_int(&b->nb, NIR_TRUE), NULL, NULL); } break; case SpvOpOuterProduct: { for (unsigned i = 0; i < src[1]->num_components; i++) { val->ssa->elems[i]->def = nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); } break; } case SpvOpDot: val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); break; case SpvOpIAddCarry: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); break; case SpvOpISubBorrow: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); break; case SpvOpUMulExtended: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); break; case SpvOpSMulExtended: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); break; case SpvOpFwidth: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); break; case SpvOpFwidthFine: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); break; case SpvOpFwidthCoarse: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); break; case SpvOpVectorTimesScalar: /* The builder will take care of splatting for us. */ val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); break; case SpvOpIsNan: val->ssa->def = nir_fne(&b->nb, src[0], src[0]); break; case SpvOpIsInf: val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), nir_imm_float(&b->nb, INFINITY)); break; case SpvOpFUnordEqual: case SpvOpFUnordNotEqual: case SpvOpFUnordLessThan: case SpvOpFUnordGreaterThan: case SpvOpFUnordLessThanEqual: case SpvOpFUnordGreaterThanEqual: { bool swap; nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type); nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type); nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap, src_alu_type, dst_alu_type); if (swap) { nir_ssa_def *tmp = src[0]; src[0] = src[1]; src[1] = tmp; } val->ssa->def = nir_ior(&b->nb, nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL), nir_ior(&b->nb, nir_fne(&b->nb, src[0], src[0]), nir_fne(&b->nb, src[1], src[1]))); break; } case SpvOpFOrdEqual: case SpvOpFOrdNotEqual: case SpvOpFOrdLessThan: case SpvOpFOrdGreaterThan: case SpvOpFOrdLessThanEqual: case SpvOpFOrdGreaterThanEqual: { bool swap; nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type); nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type); nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap, src_alu_type, dst_alu_type); if (swap) { nir_ssa_def *tmp = src[0]; src[0] = src[1]; src[1] = tmp; } val->ssa->def = nir_iand(&b->nb, nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL), nir_iand(&b->nb, nir_feq(&b->nb, src[0], src[0]), nir_feq(&b->nb, src[1], src[1]))); break; } default: { bool swap; nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type); nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type); nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap, src_alu_type, dst_alu_type); if (swap) { nir_ssa_def *tmp = src[0]; src[0] = src[1]; src[1] = tmp; } val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); break; } /* default */ } b->nb.exact = false; }
/* Recursively constructs deref chains to split a copy instruction into * multiple (if needed) copy instructions with full-length deref chains. * External callers of this function should pass the tail and head of the * deref chains found as the source and destination of the copy instruction * into this function. * * \param old_copy The copy instruction we are splitting * \param dest_head The head of the destination deref chain we are building * \param src_head The head of the source deref chain we are building * \param dest_tail The tail of the destination deref chain we are building * \param src_tail The tail of the source deref chain we are building * \param state The current split_var_copies_state object */ static void split_var_copy_instr(nir_intrinsic_instr *old_copy, nir_deref *dest_head, nir_deref *src_head, nir_deref *dest_tail, nir_deref *src_tail, struct split_var_copies_state *state) { assert(src_tail->type == dest_tail->type); /* Make sure these really are the tails of the deref chains */ assert(dest_tail->child == NULL); assert(src_tail->child == NULL); switch (glsl_get_base_type(src_tail->type)) { case GLSL_TYPE_ARRAY: { /* Make a wildcard dereference */ nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); deref->deref.type = glsl_get_array_element(src_tail->type); deref->deref_array_type = nir_deref_array_type_wildcard; /* Set the tail of both as the newly created wildcard deref. It is * safe to use the same wildcard in both places because a) we will be * copying it before we put it in an actual instruction and b) * everything that will potentially add another link in the deref * chain will also add the same thing to both chains. */ src_tail->child = &deref->deref; dest_tail->child = &deref->deref; split_var_copy_instr(old_copy, dest_head, src_head, dest_tail->child, src_tail->child, state); /* Set it back to the way we found it */ src_tail->child = NULL; dest_tail->child = NULL; break; } case GLSL_TYPE_STRUCT: /* This is the only part that actually does any interesting * splitting. For array types, we just use wildcards and resolve * them later. For structure types, we need to emit one copy * instruction for every structure element. Because we may have * structs inside structs, we just recurse and let the next level * take care of any additional structures. */ for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) { nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i); deref->deref.type = glsl_get_struct_field(src_tail->type, i); /* Set the tail of both as the newly created structure deref. It * is safe to use the same wildcard in both places because a) we * will be copying it before we put it in an actual instruction * and b) everything that will potentially add another link in the * deref chain will also add the same thing to both chains. */ src_tail->child = &deref->deref; dest_tail->child = &deref->deref; split_var_copy_instr(old_copy, dest_head, src_head, dest_tail->child, src_tail->child, state); } /* Set it back to the way we found it */ src_tail->child = NULL; dest_tail->child = NULL; break; case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: if (glsl_type_is_matrix(src_tail->type)) { nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); deref->deref.type = glsl_get_column_type(src_tail->type); deref->deref_array_type = nir_deref_array_type_wildcard; /* Set the tail of both as the newly created wildcard deref. It * is safe to use the same wildcard in both places because a) we * will be copying it before we put it in an actual instruction * and b) everything that will potentially add another link in the * deref chain will also add the same thing to both chains. */ src_tail->child = &deref->deref; dest_tail->child = &deref->deref; split_var_copy_instr(old_copy, dest_head, src_head, dest_tail->child, src_tail->child, state); /* Set it back to the way we found it */ src_tail->child = NULL; dest_tail->child = NULL; } else { /* At this point, we have fully built our deref chains and can * actually add the new copy instruction. */ nir_intrinsic_instr *new_copy = nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var); /* We need to make copies because a) this deref chain actually * belongs to the copy instruction and b) the deref chains may * have some of the same links due to the way we constructed them */ nir_deref *src = nir_copy_deref(new_copy, src_head); nir_deref *dest = nir_copy_deref(new_copy, dest_head); new_copy->variables[0] = nir_deref_as_var(dest); new_copy->variables[1] = nir_deref_as_var(src); /* Emit the copy instruction after the old instruction. We'll * remove the old one later. */ nir_instr_insert_after(&old_copy->instr, &new_copy->instr); state->progress = true; } break; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_INTERFACE: default: unreachable("Cannot copy these types"); } }
/** * Try to mark a portion of the given varying as used. Caller must ensure * that the variable represents a shader input or output. * * If the index can't be interpreted as a constant, or some other problem * occurs, then nothing will be marked and false will be returned. */ static bool try_mask_partial_io(nir_shader *shader, nir_variable *var, nir_deref_instr *deref, bool is_output_read) { const struct glsl_type *type = var->type; if (nir_is_per_vertex_io(var, shader->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } /* The code below only handles: * * - Indexing into matrices * - Indexing into arrays of (arrays, matrices, vectors, or scalars) * * For now, we just give up if we see varying structs and arrays of structs * here marking the entire variable as used. */ if (!(glsl_type_is_matrix(type) || (glsl_type_is_array(type) && !var->data.compact && (glsl_type_is_numeric(glsl_without_array(type)) || glsl_type_is_boolean(glsl_without_array(type)))))) { /* If we don't know how to handle this case, give up and let the * caller mark the whole variable as used. */ return false; } unsigned offset = get_io_offset(deref, false); if (offset == -1) return false; unsigned num_elems; unsigned elem_width = 1; unsigned mat_cols = 1; if (glsl_type_is_array(type)) { num_elems = glsl_get_aoa_size(type); if (glsl_type_is_matrix(glsl_without_array(type))) mat_cols = glsl_get_matrix_columns(glsl_without_array(type)); } else { num_elems = glsl_get_matrix_columns(type); } /* double element width for double types that takes two slots */ if (glsl_type_is_dual_slot(glsl_without_array(type))) elem_width *= 2; if (offset >= num_elems * elem_width * mat_cols) { /* Constant index outside the bounds of the matrix/array. This could * arise as a result of constant folding of a legal GLSL program. * * Even though the spec says that indexing outside the bounds of a * matrix/array results in undefined behaviour, we don't want to pass * out-of-range values to set_io_mask() (since this could result in * slots that don't exist being marked as used), so just let the caller * mark the whole variable as used. */ return false; } set_io_mask(shader, var, offset, elem_width, is_output_read); return true; }
static void _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_ssa_def *index, nir_ssa_def *offset, struct vtn_access_chain *chain, unsigned chain_idx, struct vtn_type *type, struct vtn_ssa_value **inout) { if (chain && chain_idx >= chain->length) chain = NULL; if (load && chain == NULL && *inout == NULL) *inout = vtn_create_ssa_value(b, type->type); enum glsl_base_type base_type = glsl_get_base_type(type->type); switch (base_type) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: /* This is where things get interesting. At this point, we've hit * a vector, a scalar, or a matrix. */ if (glsl_type_is_matrix(type->type)) { if (chain == NULL) { /* Loading the whole matrix */ struct vtn_ssa_value *transpose; unsigned num_ops, vec_width; if (type->row_major) { num_ops = glsl_get_vector_elements(type->type); vec_width = glsl_get_matrix_columns(type->type); if (load) { const struct glsl_type *transpose_type = glsl_matrix_type(base_type, vec_width, num_ops); *inout = vtn_create_ssa_value(b, transpose_type); } else { transpose = vtn_ssa_transpose(b, *inout); inout = &transpose; } } else { num_ops = glsl_get_matrix_columns(type->type); vec_width = glsl_get_vector_elements(type->type); } for (unsigned i = 0; i < num_ops; i++) { nir_ssa_def *elem_offset = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); _vtn_load_store_tail(b, op, load, index, elem_offset, &(*inout)->elems[i], glsl_vector_type(base_type, vec_width)); } if (load && type->row_major) *inout = vtn_ssa_transpose(b, *inout); } else if (type->row_major) { /* Row-major but with an access chiain. */ nir_ssa_def *col_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx], type->array_element->stride); offset = nir_iadd(&b->nb, offset, col_offset); if (chain_idx + 1 < chain->length) { /* Picking off a single element */ nir_ssa_def *row_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], type->stride); offset = nir_iadd(&b->nb, offset, row_offset); if (load) *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); _vtn_load_store_tail(b, op, load, index, offset, inout, glsl_scalar_type(base_type)); } else { /* Grabbing a column; picking one element off each row */ unsigned num_comps = glsl_get_vector_elements(type->type); const struct glsl_type *column_type = glsl_get_column_type(type->type); nir_ssa_def *comps[4]; for (unsigned i = 0; i < num_comps; i++) { nir_ssa_def *elem_offset = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); struct vtn_ssa_value *comp, temp_val; if (!load) { temp_val.def = nir_channel(&b->nb, (*inout)->def, i); temp_val.type = glsl_scalar_type(base_type); } comp = &temp_val; _vtn_load_store_tail(b, op, load, index, elem_offset, &comp, glsl_scalar_type(base_type)); comps[i] = comp->def; } if (load) { if (*inout == NULL) *inout = vtn_create_ssa_value(b, column_type); (*inout)->def = nir_vec(&b->nb, comps, num_comps); } } } else { /* Column-major with a deref. Fall through to array case. */ nir_ssa_def *col_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); offset = nir_iadd(&b->nb, offset, col_offset); _vtn_block_load_store(b, op, load, index, offset, chain, chain_idx + 1, type->array_element, inout); } } else if (chain == NULL) { /* Single whole vector */ assert(glsl_type_is_vector_or_scalar(type->type)); _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); } else { /* Single component of a vector. Fall through to array case. */ nir_ssa_def *elem_offset = vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); offset = nir_iadd(&b->nb, offset, elem_offset); _vtn_block_load_store(b, op, load, index, offset, NULL, 0, type->array_element, inout); } return; case GLSL_TYPE_ARRAY: { unsigned elems = glsl_get_length(type->type); for (unsigned i = 0; i < elems; i++) { nir_ssa_def *elem_off = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, type->array_element, &(*inout)->elems[i]); } return; } case GLSL_TYPE_STRUCT: { unsigned elems = glsl_get_length(type->type); for (unsigned i = 0; i < elems; i++) { nir_ssa_def *elem_off = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, type->members[i], &(*inout)->elems[i]); } return; } default: unreachable("Invalid block member type"); } }
static void _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, nir_deref *tail, struct vtn_ssa_value *inout) { /* The deref tail may contain a deref to select a component of a vector (in * other words, it might not be an actual tail) so we have to save it away * here since we overwrite it later. */ nir_deref *old_child = tail->child; if (glsl_type_is_vector_or_scalar(tail->type)) { /* Terminate the deref chain in case there is one more link to pick * off a component of the vector. */ tail->child = NULL; nir_intrinsic_op op = load ? nir_intrinsic_load_var : nir_intrinsic_store_var; nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); intrin->variables[0] = nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); intrin->num_components = glsl_get_vector_elements(tail->type); if (load) { nir_ssa_dest_init(&intrin->instr, &intrin->dest, intrin->num_components, glsl_get_bit_size(glsl_get_base_type(tail->type)), NULL); inout->def = &intrin->dest.ssa; } else { nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); intrin->src[0] = nir_src_for_ssa(inout->def); } nir_builder_instr_insert(&b->nb, &intrin->instr); } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || glsl_type_is_matrix(tail->type)) { unsigned elems = glsl_get_length(tail->type); nir_deref_array *deref_arr = nir_deref_array_create(b); deref_arr->deref_array_type = nir_deref_array_type_direct; deref_arr->deref.type = glsl_get_array_element(tail->type); tail->child = &deref_arr->deref; for (unsigned i = 0; i < elems; i++) { deref_arr->base_offset = i; _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } else { assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); unsigned elems = glsl_get_length(tail->type); nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); tail->child = &deref_struct->deref; for (unsigned i = 0; i < elems; i++) { deref_struct->index = i; deref_struct->deref.type = glsl_get_struct_field(tail->type, i); _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } tail->child = old_child; }