Exemple #1
0
static unsigned
type_size(const struct glsl_type *type)
{
   unsigned int size, i;

   switch (glsl_get_base_type(type)) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
      return glsl_get_components(type);
   case GLSL_TYPE_ARRAY:
      return type_size(glsl_get_array_element(type)) * glsl_get_length(type);
   case GLSL_TYPE_STRUCT:
      size = 0;
      for (i = 0; i < glsl_get_length(type); i++) {
         size += type_size(glsl_get_struct_field(type, i));
      }
      return size;
   case GLSL_TYPE_SAMPLER:
      return 0;
   case GLSL_TYPE_ATOMIC_UINT:
      return 0;
   case GLSL_TYPE_INTERFACE:
      return 0;
   case GLSL_TYPE_IMAGE:
      return 0;
   case GLSL_TYPE_VOID:
   case GLSL_TYPE_ERROR:
   case GLSL_TYPE_DOUBLE:
      unreachable("not reached");
   }

   return 0;
}
static nir_register *
get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)
{
   uint32_t hash = hash_deref(deref);

   struct hash_entry *entry =
      _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref);
   if (entry)
      return entry->data;

   unsigned array_size = 1;
   nir_deref *tail = &deref->deref;
   while (tail->child) {
      if (tail->child->deref_type == nir_deref_type_array)
         array_size *= glsl_get_length(tail->type);
      tail = tail->child;
   }

   assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type));

   nir_register *reg = nir_local_reg_create(state->impl);
   reg->num_components = glsl_get_vector_elements(tail->type);
   reg->num_array_elems = array_size > 1 ? array_size : 0;
   reg->bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type));

   _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
   nir_array_add(&state->derefs_array, nir_deref_var *, deref);

   return reg;
}
const glsl_type *
glsl_channel_type(const glsl_type *t)
{
   switch (glsl_get_base_type(t)) {
   case GLSL_TYPE_ARRAY: {
      const glsl_type *base = glsl_channel_type(glsl_get_array_element(t));
      return glsl_array_type(base, glsl_get_length(t));
   }
   case GLSL_TYPE_UINT:
      return glsl_uint_type();
   case GLSL_TYPE_INT:
      return glsl_int_type();
   case GLSL_TYPE_FLOAT:
      return glsl_float_type();
   case GLSL_TYPE_BOOL:
      return glsl_bool_type();
   case GLSL_TYPE_DOUBLE:
      return glsl_double_type();
   case GLSL_TYPE_UINT64:
      return glsl_uint64_t_type();
   case GLSL_TYPE_INT64:
      return glsl_int64_t_type();
   case GLSL_TYPE_FLOAT16:
      return glsl_float16_t_type();
   case GLSL_TYPE_UINT16:
      return glsl_uint16_t_type();
   case GLSL_TYPE_INT16:
      return glsl_int16_t_type();
   default:
      unreachable("Unhandled base type glsl_channel_type()");
   }
}
Exemple #4
0
nir_ssa_def *
vtn_access_chain_to_offset(struct vtn_builder *b,
                           struct vtn_access_chain *chain,
                           nir_ssa_def **index_out, struct vtn_type **type_out,
                           unsigned *end_idx_out, bool stop_at_matrix)
{
   unsigned idx = 0;
   struct vtn_type *type;
   *index_out = get_vulkan_resource_index(b, chain, &type, &idx);

   nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
   for (; idx < chain->length; idx++) {
      enum glsl_base_type base_type = glsl_get_base_type(type->type);
      switch (base_type) {
      case GLSL_TYPE_UINT:
      case GLSL_TYPE_INT:
      case GLSL_TYPE_FLOAT:
      case GLSL_TYPE_DOUBLE:
      case GLSL_TYPE_BOOL:
         /* Some users may not want matrix or vector derefs */
         if (stop_at_matrix)
            goto end;
         /* Fall through */

      case GLSL_TYPE_ARRAY:
         offset = nir_iadd(&b->nb, offset,
                           vtn_access_link_as_ssa(b, chain->link[idx],
                                                  type->stride));

         type = type->array_element;
         break;

      case GLSL_TYPE_STRUCT: {
         assert(chain->link[idx].mode == vtn_access_mode_literal);
         unsigned member = chain->link[idx].id;
         offset = nir_iadd(&b->nb, offset,
                           nir_imm_int(&b->nb, type->offsets[member]));
         type = type->members[member];
         break;
      }

      default:
         unreachable("Invalid type for deref");
      }
   }

end:
   *type_out = type;
   if (end_idx_out)
      *end_idx_out = idx;

   return offset;
}
Exemple #5
0
static void
_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
                     nir_ssa_def *index, nir_ssa_def *offset,
                     struct vtn_ssa_value **inout, const struct glsl_type *type)
{
   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
   instr->num_components = glsl_get_vector_elements(type);

   int src = 0;
   if (!load) {
      nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1);
      instr->src[src++] = nir_src_for_ssa((*inout)->def);
   }

   /* We set the base and size for push constant load to the entire push
    * constant block for now.
    */
   if (op == nir_intrinsic_load_push_constant) {
      nir_intrinsic_set_base(instr, 0);
      nir_intrinsic_set_range(instr, 128);
   }

   if (index)
      instr->src[src++] = nir_src_for_ssa(index);

   instr->src[src++] = nir_src_for_ssa(offset);

   if (load) {
      nir_ssa_dest_init(&instr->instr, &instr->dest,
                        instr->num_components,
                        glsl_get_bit_size(glsl_get_base_type(type)), NULL);
      (*inout)->def = &instr->dest.ssa;
   }

   nir_builder_instr_insert(&b->nb, &instr->instr);

   if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
      (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
}
Exemple #6
0
static void
_vtn_variable_load_store(struct vtn_builder *b, bool load,
                         struct vtn_access_chain *chain,
                         struct vtn_type *tail_type,
                         struct vtn_ssa_value **inout)
{
   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
      /* At this point, we have a scalar, vector, or matrix so we know that
       * there cannot be any structure splitting still in the way.  By
       * stopping at the matrix level rather than the vector level, we
       * ensure that matrices get loaded in the optimal way even if they
       * are storred row-major in a UBO.
       */
      if (load) {
         *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain));
      } else {
         vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain));
      }
      return;

   case GLSL_TYPE_ARRAY:
   case GLSL_TYPE_STRUCT: {
      struct vtn_access_chain *new_chain =
         vtn_access_chain_extend(b, chain, 1);
      new_chain->link[chain->length].mode = vtn_access_mode_literal;
      unsigned elems = glsl_get_length(tail_type->type);
      if (load) {
         assert(*inout == NULL);
         *inout = rzalloc(b, struct vtn_ssa_value);
         (*inout)->type = tail_type->type;
         (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems);
      }
      for (unsigned i = 0; i < elems; i++) {
         new_chain->link[chain->length].id = i;
         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
            tail_type->array_element : tail_type->members[i];
         _vtn_variable_load_store(b, load, new_chain, elem_type,
                                  &(*inout)->elems[i]);
      }
      return;
   }

   default:
      unreachable("Invalid access chain type");
   }
}
Exemple #7
0
static struct vtn_ssa_value *
mat_times_scalar(struct vtn_builder *b,
                 struct vtn_ssa_value *mat,
                 nir_ssa_def *scalar)
{
   struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type);
   for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) {
      if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT)
         dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar);
      else
         dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar);
   }

   return dest;
}
Exemple #8
0
static bool
split_var_copies_block(nir_block *block, void *void_state)
{
   struct split_var_copies_state *state = void_state;

   nir_foreach_instr_safe(block, instr) {
      if (instr->type != nir_instr_type_intrinsic)
         continue;

      nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
      if (intrinsic->intrinsic != nir_intrinsic_copy_var)
         continue;

      nir_deref *dest_head = &intrinsic->variables[0]->deref;
      nir_deref *src_head = &intrinsic->variables[1]->deref;
      nir_deref *dest_tail = get_deref_tail(dest_head);
      nir_deref *src_tail = get_deref_tail(src_head);

      switch (glsl_get_base_type(src_tail->type)) {
      case GLSL_TYPE_ARRAY:
      case GLSL_TYPE_STRUCT:
         split_var_copy_instr(intrinsic, dest_head, src_head,
                              dest_tail, src_tail, state);
         nir_instr_remove(&intrinsic->instr);
         ralloc_steal(state->dead_ctx, instr);
         break;
      case GLSL_TYPE_FLOAT:
      case GLSL_TYPE_INT:
      case GLSL_TYPE_UINT:
      case GLSL_TYPE_BOOL:
         if (glsl_type_is_matrix(src_tail->type)) {
            split_var_copy_instr(intrinsic, dest_head, src_head,
                                 dest_tail, src_tail, state);
            nir_instr_remove(&intrinsic->instr);
            ralloc_steal(state->dead_ctx, instr);
         }
         break;
      default:
         unreachable("Invalid type");
         break;
      }
   }

   return true;
}
Exemple #9
0
/* Tries to compute the size of an interface block based on the strides and
 * offsets that are provided to us in the SPIR-V source.
 */
static unsigned
vtn_type_block_size(struct vtn_type *type)
{
   enum glsl_base_type base_type = glsl_get_base_type(type->type);
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
   case GLSL_TYPE_DOUBLE: {
      unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) :
                                        glsl_get_matrix_columns(type->type);
      if (cols > 1) {
         assert(type->stride > 0);
         return type->stride * cols;
      } else if (base_type == GLSL_TYPE_DOUBLE) {
         return glsl_get_vector_elements(type->type) * 8;
      } else {
         return glsl_get_vector_elements(type->type) * 4;
      }
   }

   case GLSL_TYPE_STRUCT:
   case GLSL_TYPE_INTERFACE: {
      unsigned size = 0;
      unsigned num_fields = glsl_get_length(type->type);
      for (unsigned f = 0; f < num_fields; f++) {
         unsigned field_end = type->offsets[f] +
                              vtn_type_block_size(type->members[f]);
         size = MAX2(size, field_end);
      }
      return size;
   }

   case GLSL_TYPE_ARRAY:
      assert(type->stride > 0);
      assert(glsl_get_length(type->type) > 0);
      return type->stride * glsl_get_length(type->type);

   default:
      assert(!"Invalid block type");
      return 0;
   }
}
Exemple #10
0
static void
_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
                   struct vtn_access_chain *src, struct vtn_type *tail_type)
{
   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
      /* At this point, we have a scalar, vector, or matrix so we know that
       * there cannot be any structure splitting still in the way.  By
       * stopping at the matrix level rather than the vector level, we
       * ensure that matrices get loaded in the optimal way even if they
       * are storred row-major in a UBO.
       */
      vtn_variable_store(b, vtn_variable_load(b, src), dest);
      return;

   case GLSL_TYPE_ARRAY:
   case GLSL_TYPE_STRUCT: {
      struct vtn_access_chain *new_src, *new_dest;
      new_src = vtn_access_chain_extend(b, src, 1);
      new_dest = vtn_access_chain_extend(b, dest, 1);
      new_src->link[src->length].mode = vtn_access_mode_literal;
      new_dest->link[dest->length].mode = vtn_access_mode_literal;
      unsigned elems = glsl_get_length(tail_type->type);
      for (unsigned i = 0; i < elems; i++) {
         new_src->link[src->length].id = i;
         new_dest->link[dest->length].id = i;
         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
            tail_type->array_element : tail_type->members[i];
         _vtn_variable_copy(b, new_dest, new_src, elem_type);
      }
      return;
   }

   default:
      unreachable("Invalid access chain type");
   }
}
Exemple #11
0
static struct vtn_ssa_value *
matrix_multiply(struct vtn_builder *b,
                struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
{

   struct vtn_ssa_value *src0 = wrap_matrix(b, _src0);
   struct vtn_ssa_value *src1 = wrap_matrix(b, _src1);
   struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed);
   struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed);

   unsigned src0_rows = glsl_get_vector_elements(src0->type);
   unsigned src0_columns = glsl_get_matrix_columns(src0->type);
   unsigned src1_columns = glsl_get_matrix_columns(src1->type);

   const struct glsl_type *dest_type;
   if (src1_columns > 1) {
      dest_type = glsl_matrix_type(glsl_get_base_type(src0->type),
                                   src0_rows, src1_columns);
   } else {
      dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows);
   }
   struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type);

   dest = wrap_matrix(b, dest);

   bool transpose_result = false;
   if (src0_transpose && src1_transpose) {
      /* transpose(A) * transpose(B) = transpose(B * A) */
      src1 = src0_transpose;
      src0 = src1_transpose;
      src0_transpose = NULL;
      src1_transpose = NULL;
      transpose_result = true;
   }

   if (src0_transpose && !src1_transpose &&
       glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
      /* We already have the rows of src0 and the columns of src1 available,
       * so we can just take the dot product of each row with each column to
       * get the result.
       */

      for (unsigned i = 0; i < src1_columns; i++) {
         nir_ssa_def *vec_src[4];
         for (unsigned j = 0; j < src0_rows; j++) {
            vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
                                          src1->elems[i]->def);
         }
         dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
      }
   } else {
      /* We don't handle the case where src1 is transposed but not src0, since
       * the general case only uses individual components of src1 so the
       * optimizer should chew through the transpose we emitted for src1.
       */

      for (unsigned i = 0; i < src1_columns; i++) {
         /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
         dest->elems[i]->def =
            nir_fmul(&b->nb, src0->elems[0]->def,
                     nir_channel(&b->nb, src1->elems[i]->def, 0));
         for (unsigned j = 1; j < src0_columns; j++) {
            dest->elems[i]->def =
               nir_fadd(&b->nb, dest->elems[i]->def,
                        nir_fmul(&b->nb, src0->elems[j]->def,
                                 nir_channel(&b->nb, src1->elems[i]->def, j)));
         }
      }
   }

   dest = unwrap_matrix(dest);

   if (transpose_result)
      dest = vtn_ssa_transpose(b, dest);

   return dest;
}
Exemple #12
0
static void
_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
                      nir_ssa_def *index, nir_ssa_def *offset,
                      struct vtn_access_chain *chain, unsigned chain_idx,
                      struct vtn_type *type, struct vtn_ssa_value **inout)
{
   if (chain && chain_idx >= chain->length)
      chain = NULL;

   if (load && chain == NULL && *inout == NULL)
      *inout = vtn_create_ssa_value(b, type->type);

   enum glsl_base_type base_type = glsl_get_base_type(type->type);
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
      /* This is where things get interesting.  At this point, we've hit
       * a vector, a scalar, or a matrix.
       */
      if (glsl_type_is_matrix(type->type)) {
         if (chain == NULL) {
            /* Loading the whole matrix */
            struct vtn_ssa_value *transpose;
            unsigned num_ops, vec_width;
            if (type->row_major) {
               num_ops = glsl_get_vector_elements(type->type);
               vec_width = glsl_get_matrix_columns(type->type);
               if (load) {
                  const struct glsl_type *transpose_type =
                     glsl_matrix_type(base_type, vec_width, num_ops);
                  *inout = vtn_create_ssa_value(b, transpose_type);
               } else {
                  transpose = vtn_ssa_transpose(b, *inout);
                  inout = &transpose;
               }
            } else {
               num_ops = glsl_get_matrix_columns(type->type);
               vec_width = glsl_get_vector_elements(type->type);
            }

            for (unsigned i = 0; i < num_ops; i++) {
               nir_ssa_def *elem_offset =
                  nir_iadd(&b->nb, offset,
                           nir_imm_int(&b->nb, i * type->stride));
               _vtn_load_store_tail(b, op, load, index, elem_offset,
                                    &(*inout)->elems[i],
                                    glsl_vector_type(base_type, vec_width));
            }

            if (load && type->row_major)
               *inout = vtn_ssa_transpose(b, *inout);
         } else if (type->row_major) {
            /* Row-major but with an access chiain. */
            nir_ssa_def *col_offset =
               vtn_access_link_as_ssa(b, chain->link[chain_idx],
                                      type->array_element->stride);
            offset = nir_iadd(&b->nb, offset, col_offset);

            if (chain_idx + 1 < chain->length) {
               /* Picking off a single element */
               nir_ssa_def *row_offset =
                  vtn_access_link_as_ssa(b, chain->link[chain_idx + 1],
                                         type->stride);
               offset = nir_iadd(&b->nb, offset, row_offset);
               if (load)
                  *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
               _vtn_load_store_tail(b, op, load, index, offset, inout,
                                    glsl_scalar_type(base_type));
            } else {
               /* Grabbing a column; picking one element off each row */
               unsigned num_comps = glsl_get_vector_elements(type->type);
               const struct glsl_type *column_type =
                  glsl_get_column_type(type->type);

               nir_ssa_def *comps[4];
               for (unsigned i = 0; i < num_comps; i++) {
                  nir_ssa_def *elem_offset =
                     nir_iadd(&b->nb, offset,
                              nir_imm_int(&b->nb, i * type->stride));

                  struct vtn_ssa_value *comp, temp_val;
                  if (!load) {
                     temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
                     temp_val.type = glsl_scalar_type(base_type);
                  }
                  comp = &temp_val;
                  _vtn_load_store_tail(b, op, load, index, elem_offset,
                                       &comp, glsl_scalar_type(base_type));
                  comps[i] = comp->def;
               }

               if (load) {
                  if (*inout == NULL)
                     *inout = vtn_create_ssa_value(b, column_type);

                  (*inout)->def = nir_vec(&b->nb, comps, num_comps);
               }
            }
         } else {
            /* Column-major with a deref. Fall through to array case. */
            nir_ssa_def *col_offset =
               vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
            offset = nir_iadd(&b->nb, offset, col_offset);

            _vtn_block_load_store(b, op, load, index, offset,
                                  chain, chain_idx + 1,
                                  type->array_element, inout);
         }
      } else if (chain == NULL) {
         /* Single whole vector */
         assert(glsl_type_is_vector_or_scalar(type->type));
         _vtn_load_store_tail(b, op, load, index, offset, inout, type->type);
      } else {
         /* Single component of a vector. Fall through to array case. */
         nir_ssa_def *elem_offset =
            vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
         offset = nir_iadd(&b->nb, offset, elem_offset);

         _vtn_block_load_store(b, op, load, index, offset, NULL, 0,
                               type->array_element, inout);
      }
      return;

   case GLSL_TYPE_ARRAY: {
      unsigned elems = glsl_get_length(type->type);
      for (unsigned i = 0; i < elems; i++) {
         nir_ssa_def *elem_off =
            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
                               type->array_element, &(*inout)->elems[i]);
      }
      return;
   }

   case GLSL_TYPE_STRUCT: {
      unsigned elems = glsl_get_length(type->type);
      for (unsigned i = 0; i < elems; i++) {
         nir_ssa_def *elem_off =
            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
                               type->members[i], &(*inout)->elems[i]);
      }
      return;
   }

   default:
      unreachable("Invalid block member type");
   }
}
Exemple #13
0
static void
validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
{
   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
   for (unsigned i = 0; i < num_srcs; i++) {
      unsigned components_read =
         nir_intrinsic_infos[instr->intrinsic].src_components[i];
      if (components_read == 0)
         components_read = instr->num_components;

      validate_assert(state, components_read > 0);

      if (instr->src[i].is_ssa) {
         validate_assert(state, components_read <= instr->src[i].ssa->num_components);
      } else if (!instr->src[i].reg.reg->is_packed) {
         validate_assert(state, components_read <= instr->src[i].reg.reg->num_components);
      }

      validate_src(&instr->src[i], state);
   }

   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
   for (unsigned i = 0; i < num_vars; i++) {
      validate_deref_var(instr, instr->variables[i], state);
   }

   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
      unsigned components_written =
         nir_intrinsic_infos[instr->intrinsic].dest_components;
      if (components_written == 0)
         components_written = instr->num_components;

      validate_assert(state, components_written > 0);

      if (instr->dest.is_ssa) {
         validate_assert(state, components_written <= instr->dest.ssa.num_components);
      } else if (!instr->dest.reg.reg->is_packed) {
         validate_assert(state, components_written <= instr->dest.reg.reg->num_components);
      }

      validate_dest(&instr->dest, state);
   }

   switch (instr->intrinsic) {
   case nir_intrinsic_load_var: {
      const struct glsl_type *type =
         nir_deref_tail(&instr->variables[0]->deref)->type;
      validate_assert(state, glsl_type_is_vector_or_scalar(type) ||
             (instr->variables[0]->var->data.mode == nir_var_uniform &&
              glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
      validate_assert(state, instr->num_components == glsl_get_vector_elements(type));
      break;
   }
   case nir_intrinsic_store_var: {
      const struct glsl_type *type =
         nir_deref_tail(&instr->variables[0]->deref)->type;
      validate_assert(state, glsl_type_is_vector_or_scalar(type) ||
             (instr->variables[0]->var->data.mode == nir_var_uniform &&
              glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
      validate_assert(state, instr->num_components == glsl_get_vector_elements(type));
      validate_assert(state, instr->variables[0]->var->data.mode != nir_var_shader_in &&
             instr->variables[0]->var->data.mode != nir_var_uniform &&
             instr->variables[0]->var->data.mode != nir_var_shader_storage);
      validate_assert(state, (nir_intrinsic_write_mask(instr) & ~((1 << instr->num_components) - 1)) == 0);
      break;
   }
   case nir_intrinsic_copy_var:
      validate_assert(state, nir_deref_tail(&instr->variables[0]->deref)->type ==
             nir_deref_tail(&instr->variables[1]->deref)->type);
      validate_assert(state, instr->variables[0]->var->data.mode != nir_var_shader_in &&
             instr->variables[0]->var->data.mode != nir_var_uniform &&
             instr->variables[0]->var->data.mode != nir_var_shader_storage);
      break;
   default:
      break;
   }
}
void
vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
                    const uint32_t *w, unsigned count)
{
   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);

   val->ssa = vtn_create_ssa_value(b, val->type->type);

   switch (opcode) {
   case SpvOpGroupNonUniformElect: {
      vtn_fail_if(val->type->type != glsl_bool_type(),
                  "OpGroupNonUniformElect must return a Bool");
      nir_intrinsic_instr *elect =
         nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_elect);
      nir_ssa_dest_init_for_type(&elect->instr, &elect->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &elect->instr);
      val->ssa->def = &elect->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformBallot: {
      vtn_fail_if(val->type->type != glsl_vector_type(GLSL_TYPE_UINT, 4),
                  "OpGroupNonUniformBallot must return a uvec4");
      nir_intrinsic_instr *ballot =
         nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_ballot);
      ballot->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
      nir_ssa_dest_init(&ballot->instr, &ballot->dest, 4, 32, NULL);
      ballot->num_components = 4;
      nir_builder_instr_insert(&b->nb, &ballot->instr);
      val->ssa->def = &ballot->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformInverseBallot: {
      /* This one is just a BallotBitfieldExtract with subgroup invocation.
       * We could add a NIR intrinsic but it's easier to just lower it on the
       * spot.
       */
      nir_intrinsic_instr *intrin =
         nir_intrinsic_instr_create(b->nb.shader,
                                    nir_intrinsic_ballot_bitfield_extract);

      intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
      intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));

      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &intrin->instr);

      val->ssa->def = &intrin->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformBallotBitExtract:
   case SpvOpGroupNonUniformBallotBitCount:
   case SpvOpGroupNonUniformBallotFindLSB:
   case SpvOpGroupNonUniformBallotFindMSB: {
      nir_ssa_def *src0, *src1 = NULL;
      nir_intrinsic_op op;
      switch (opcode) {
      case SpvOpGroupNonUniformBallotBitExtract:
         op = nir_intrinsic_ballot_bitfield_extract;
         src0 = vtn_ssa_value(b, w[4])->def;
         src1 = vtn_ssa_value(b, w[5])->def;
         break;
      case SpvOpGroupNonUniformBallotBitCount:
         switch ((SpvGroupOperation)w[4]) {
         case SpvGroupOperationReduce:
            op = nir_intrinsic_ballot_bit_count_reduce;
            break;
         case SpvGroupOperationInclusiveScan:
            op = nir_intrinsic_ballot_bit_count_inclusive;
            break;
         case SpvGroupOperationExclusiveScan:
            op = nir_intrinsic_ballot_bit_count_exclusive;
            break;
         default:
            unreachable("Invalid group operation");
         }
         src0 = vtn_ssa_value(b, w[5])->def;
         break;
      case SpvOpGroupNonUniformBallotFindLSB:
         op = nir_intrinsic_ballot_find_lsb;
         src0 = vtn_ssa_value(b, w[4])->def;
         break;
      case SpvOpGroupNonUniformBallotFindMSB:
         op = nir_intrinsic_ballot_find_msb;
         src0 = vtn_ssa_value(b, w[4])->def;
         break;
      default:
         unreachable("Unhandled opcode");
      }

      nir_intrinsic_instr *intrin =
         nir_intrinsic_instr_create(b->nb.shader, op);

      intrin->src[0] = nir_src_for_ssa(src0);
      if (src1)
         intrin->src[1] = nir_src_for_ssa(src1);

      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &intrin->instr);

      val->ssa->def = &intrin->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformBroadcastFirst:
      vtn_build_subgroup_instr(b, nir_intrinsic_read_first_invocation,
                               val->ssa, vtn_ssa_value(b, w[4]), NULL, 0, 0);
      break;

   case SpvOpGroupNonUniformBroadcast:
      vtn_build_subgroup_instr(b, nir_intrinsic_read_invocation,
                               val->ssa, vtn_ssa_value(b, w[4]),
                               vtn_ssa_value(b, w[5])->def, 0, 0);
      break;

   case SpvOpGroupNonUniformAll:
   case SpvOpGroupNonUniformAny:
   case SpvOpGroupNonUniformAllEqual: {
      vtn_fail_if(val->type->type != glsl_bool_type(),
                  "OpGroupNonUniform(All|Any|AllEqual) must return a bool");
      nir_intrinsic_op op;
      switch (opcode) {
      case SpvOpGroupNonUniformAll:
         op = nir_intrinsic_vote_all;
         break;
      case SpvOpGroupNonUniformAny:
         op = nir_intrinsic_vote_any;
         break;
      case SpvOpGroupNonUniformAllEqual: {
         switch (glsl_get_base_type(val->type->type)) {
         case GLSL_TYPE_FLOAT:
         case GLSL_TYPE_DOUBLE:
            op = nir_intrinsic_vote_feq;
            break;
         case GLSL_TYPE_UINT:
         case GLSL_TYPE_INT:
         case GLSL_TYPE_UINT64:
         case GLSL_TYPE_INT64:
         case GLSL_TYPE_BOOL:
            op = nir_intrinsic_vote_ieq;
            break;
         default:
            unreachable("Unhandled type");
         }
         break;
      }
      default:
         unreachable("Unhandled opcode");
      }

      nir_ssa_def *src0 = vtn_ssa_value(b, w[4])->def;

      nir_intrinsic_instr *intrin =
         nir_intrinsic_instr_create(b->nb.shader, op);
      intrin->num_components = src0->num_components;
      intrin->src[0] = nir_src_for_ssa(src0);
      nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest,
                                 val->type->type, NULL);
      nir_builder_instr_insert(&b->nb, &intrin->instr);

      val->ssa->def = &intrin->dest.ssa;
      break;
   }

   case SpvOpGroupNonUniformShuffle:
   case SpvOpGroupNonUniformShuffleXor:
   case SpvOpGroupNonUniformShuffleUp:
   case SpvOpGroupNonUniformShuffleDown: {
      nir_intrinsic_op op;
      switch (opcode) {
      case SpvOpGroupNonUniformShuffle:
         op = nir_intrinsic_shuffle;
         break;
      case SpvOpGroupNonUniformShuffleXor:
         op = nir_intrinsic_shuffle_xor;
         break;
      case SpvOpGroupNonUniformShuffleUp:
         op = nir_intrinsic_shuffle_up;
         break;
      case SpvOpGroupNonUniformShuffleDown:
         op = nir_intrinsic_shuffle_down;
         break;
      default:
         unreachable("Invalid opcode");
      }
      vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]),
                               vtn_ssa_value(b, w[5])->def, 0, 0);
      break;
   }

   case SpvOpGroupNonUniformQuadBroadcast:
      vtn_build_subgroup_instr(b, nir_intrinsic_quad_broadcast,
                               val->ssa, vtn_ssa_value(b, w[4]),
                               vtn_ssa_value(b, w[5])->def, 0, 0);
      break;

   case SpvOpGroupNonUniformQuadSwap: {
      unsigned direction = vtn_constant_uint(b, w[5]);
      nir_intrinsic_op op;
      switch (direction) {
      case 0:
         op = nir_intrinsic_quad_swap_horizontal;
         break;
      case 1:
         op = nir_intrinsic_quad_swap_vertical;
         break;
      case 2:
         op = nir_intrinsic_quad_swap_diagonal;
         break;
      default:
         vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap");
      }
      vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]),
                               NULL, 0, 0);
      break;
   }

   case SpvOpGroupNonUniformIAdd:
   case SpvOpGroupNonUniformFAdd:
   case SpvOpGroupNonUniformIMul:
   case SpvOpGroupNonUniformFMul:
   case SpvOpGroupNonUniformSMin:
   case SpvOpGroupNonUniformUMin:
   case SpvOpGroupNonUniformFMin:
   case SpvOpGroupNonUniformSMax:
   case SpvOpGroupNonUniformUMax:
   case SpvOpGroupNonUniformFMax:
   case SpvOpGroupNonUniformBitwiseAnd:
   case SpvOpGroupNonUniformBitwiseOr:
   case SpvOpGroupNonUniformBitwiseXor:
   case SpvOpGroupNonUniformLogicalAnd:
   case SpvOpGroupNonUniformLogicalOr:
   case SpvOpGroupNonUniformLogicalXor: {
      nir_op reduction_op;
      switch (opcode) {
      case SpvOpGroupNonUniformIAdd:
         reduction_op = nir_op_iadd;
         break;
      case SpvOpGroupNonUniformFAdd:
         reduction_op = nir_op_fadd;
         break;
      case SpvOpGroupNonUniformIMul:
         reduction_op = nir_op_imul;
         break;
      case SpvOpGroupNonUniformFMul:
         reduction_op = nir_op_fmul;
         break;
      case SpvOpGroupNonUniformSMin:
         reduction_op = nir_op_imin;
         break;
      case SpvOpGroupNonUniformUMin:
         reduction_op = nir_op_umin;
         break;
      case SpvOpGroupNonUniformFMin:
         reduction_op = nir_op_fmin;
         break;
      case SpvOpGroupNonUniformSMax:
         reduction_op = nir_op_imax;
         break;
      case SpvOpGroupNonUniformUMax:
         reduction_op = nir_op_umax;
         break;
      case SpvOpGroupNonUniformFMax:
         reduction_op = nir_op_fmax;
         break;
      case SpvOpGroupNonUniformBitwiseAnd:
      case SpvOpGroupNonUniformLogicalAnd:
         reduction_op = nir_op_iand;
         break;
      case SpvOpGroupNonUniformBitwiseOr:
      case SpvOpGroupNonUniformLogicalOr:
         reduction_op = nir_op_ior;
         break;
      case SpvOpGroupNonUniformBitwiseXor:
      case SpvOpGroupNonUniformLogicalXor:
         reduction_op = nir_op_ixor;
         break;
      default:
         unreachable("Invalid reduction operation");
      }

      nir_intrinsic_op op;
      unsigned cluster_size = 0;
      switch ((SpvGroupOperation)w[4]) {
      case SpvGroupOperationReduce:
         op = nir_intrinsic_reduce;
         break;
      case SpvGroupOperationInclusiveScan:
         op = nir_intrinsic_inclusive_scan;
         break;
      case SpvGroupOperationExclusiveScan:
         op = nir_intrinsic_exclusive_scan;
         break;
      case SpvGroupOperationClusteredReduce:
         op = nir_intrinsic_reduce;
         assert(count == 7);
         cluster_size = vtn_constant_uint(b, w[6]);
         break;
      default:
         unreachable("Invalid group operation");
      }

      vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[5]),
                               NULL, reduction_op, cluster_size);
      break;
   }

   default:
      unreachable("Invalid SPIR-V opcode");
   }
}
Exemple #15
0
/* Recursively constructs deref chains to split a copy instruction into
 * multiple (if needed) copy instructions with full-length deref chains.
 * External callers of this function should pass the tail and head of the
 * deref chains found as the source and destination of the copy instruction
 * into this function.
 *
 * \param  old_copy  The copy instruction we are splitting
 * \param  dest_head The head of the destination deref chain we are building
 * \param  src_head  The head of the source deref chain we are building
 * \param  dest_tail The tail of the destination deref chain we are building
 * \param  src_tail  The tail of the source deref chain we are building
 * \param  state     The current split_var_copies_state object
 */
static void
split_var_copy_instr(nir_intrinsic_instr *old_copy,
                     nir_deref *dest_head, nir_deref *src_head,
                     nir_deref *dest_tail, nir_deref *src_tail,
                     struct split_var_copies_state *state)
{
   assert(src_tail->type == dest_tail->type);

   /* Make sure these really are the tails of the deref chains */
   assert(dest_tail->child == NULL);
   assert(src_tail->child == NULL);

   switch (glsl_get_base_type(src_tail->type)) {
   case GLSL_TYPE_ARRAY: {
      /* Make a wildcard dereference */
      nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
      deref->deref.type = glsl_get_array_element(src_tail->type);
      deref->deref_array_type = nir_deref_array_type_wildcard;

      /* Set the tail of both as the newly created wildcard deref.  It is
       * safe to use the same wildcard in both places because a) we will be
       * copying it before we put it in an actual instruction and b)
       * everything that will potentially add another link in the deref
       * chain will also add the same thing to both chains.
       */
      src_tail->child = &deref->deref;
      dest_tail->child = &deref->deref;

      split_var_copy_instr(old_copy, dest_head, src_head,
                           dest_tail->child, src_tail->child, state);

      /* Set it back to the way we found it */
      src_tail->child = NULL;
      dest_tail->child = NULL;
      break;
   }

   case GLSL_TYPE_STRUCT:
      /* This is the only part that actually does any interesting
       * splitting.  For array types, we just use wildcards and resolve
       * them later.  For structure types, we need to emit one copy
       * instruction for every structure element.  Because we may have
       * structs inside structs, we just recurse and let the next level
       * take care of any additional structures.
       */
      for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) {
         nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i);
         deref->deref.type = glsl_get_struct_field(src_tail->type, i);

         /* Set the tail of both as the newly created structure deref.  It
          * is safe to use the same wildcard in both places because a) we
          * will be copying it before we put it in an actual instruction
          * and b) everything that will potentially add another link in the
          * deref chain will also add the same thing to both chains.
          */
         src_tail->child = &deref->deref;
         dest_tail->child = &deref->deref;

         split_var_copy_instr(old_copy, dest_head, src_head,
                              dest_tail->child, src_tail->child, state);
      }
      /* Set it back to the way we found it */
      src_tail->child = NULL;
      dest_tail->child = NULL;
      break;

   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
      if (glsl_type_is_matrix(src_tail->type)) {
         nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
         deref->deref.type = glsl_get_column_type(src_tail->type);
         deref->deref_array_type = nir_deref_array_type_wildcard;

         /* Set the tail of both as the newly created wildcard deref.  It
          * is safe to use the same wildcard in both places because a) we
          * will be copying it before we put it in an actual instruction
          * and b) everything that will potentially add another link in the
          * deref chain will also add the same thing to both chains.
          */
         src_tail->child = &deref->deref;
         dest_tail->child = &deref->deref;

         split_var_copy_instr(old_copy, dest_head, src_head,
                              dest_tail->child, src_tail->child, state);

         /* Set it back to the way we found it */
         src_tail->child = NULL;
         dest_tail->child = NULL;
      } else {
         /* At this point, we have fully built our deref chains and can
          * actually add the new copy instruction.
          */
         nir_intrinsic_instr *new_copy =
            nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var);

         /* We need to make copies because a) this deref chain actually
          * belongs to the copy instruction and b) the deref chains may
          * have some of the same links due to the way we constructed them
          */
         nir_deref *src = nir_copy_deref(new_copy, src_head);
         nir_deref *dest = nir_copy_deref(new_copy, dest_head);

         new_copy->variables[0] = nir_deref_as_var(dest);
         new_copy->variables[1] = nir_deref_as_var(src);

         /* Emit the copy instruction after the old instruction.  We'll
          * remove the old one later.
          */
         nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
         state->progress = true;
      }
      break;

   case GLSL_TYPE_SAMPLER:
   case GLSL_TYPE_IMAGE:
   case GLSL_TYPE_ATOMIC_UINT:
   case GLSL_TYPE_INTERFACE:
   default:
      unreachable("Cannot copy these types");
   }
}
Exemple #16
0
nir_deref_var *
vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain)
{
   nir_deref_var *deref_var;
   if (chain->var->var) {
      deref_var = nir_deref_var_create(b, chain->var->var);
   } else {
      assert(chain->var->members);
      /* Create the deref_var manually.  It will get filled out later. */
      deref_var = rzalloc(b, nir_deref_var);
      deref_var->deref.deref_type = nir_deref_type_var;
   }

   struct vtn_type *deref_type = chain->var->type;
   nir_deref *tail = &deref_var->deref;
   nir_variable **members = chain->var->members;

   for (unsigned i = 0; i < chain->length; i++) {
      enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
      switch (base_type) {
      case GLSL_TYPE_UINT:
      case GLSL_TYPE_INT:
      case GLSL_TYPE_FLOAT:
      case GLSL_TYPE_DOUBLE:
      case GLSL_TYPE_BOOL:
      case GLSL_TYPE_ARRAY: {
         deref_type = deref_type->array_element;

         nir_deref_array *deref_arr = nir_deref_array_create(b);
         deref_arr->deref.type = deref_type->type;

         if (chain->link[i].mode == vtn_access_mode_literal) {
            deref_arr->deref_array_type = nir_deref_array_type_direct;
            deref_arr->base_offset = chain->link[i].id;
         } else {
            assert(chain->link[i].mode == vtn_access_mode_id);
            deref_arr->deref_array_type = nir_deref_array_type_indirect;
            deref_arr->base_offset = 0;
            deref_arr->indirect =
               nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def);
         }
         tail->child = &deref_arr->deref;
         tail = tail->child;
         break;
      }

      case GLSL_TYPE_STRUCT: {
         assert(chain->link[i].mode == vtn_access_mode_literal);
         unsigned idx = chain->link[i].id;
         deref_type = deref_type->members[idx];
         if (members) {
            /* This is a pre-split structure. */
            deref_var->var = members[idx];
            rewrite_deref_types(&deref_var->deref, members[idx]->type);
            assert(tail->type == deref_type->type);
            members = NULL;
         } else {
            nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx);
            deref_struct->deref.type = deref_type->type;
            tail->child = &deref_struct->deref;
            tail = tail->child;
         }
         break;
      }
      default:
         unreachable("Invalid type for deref");
      }
   }

   assert(members == NULL);
   return deref_var;
}
Exemple #17
0
static void
_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
                      nir_deref *tail, struct vtn_ssa_value *inout)
{
   /* The deref tail may contain a deref to select a component of a vector (in
    * other words, it might not be an actual tail) so we have to save it away
    * here since we overwrite it later.
    */
   nir_deref *old_child = tail->child;

   if (glsl_type_is_vector_or_scalar(tail->type)) {
      /* Terminate the deref chain in case there is one more link to pick
       * off a component of the vector.
       */
      tail->child = NULL;

      nir_intrinsic_op op = load ? nir_intrinsic_load_var :
                                   nir_intrinsic_store_var;

      nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
      intrin->variables[0] =
         nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));
      intrin->num_components = glsl_get_vector_elements(tail->type);

      if (load) {
         nir_ssa_dest_init(&intrin->instr, &intrin->dest,
                           intrin->num_components,
                           glsl_get_bit_size(glsl_get_base_type(tail->type)),
                           NULL);
         inout->def = &intrin->dest.ssa;
      } else {
         nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
         intrin->src[0] = nir_src_for_ssa(inout->def);
      }

      nir_builder_instr_insert(&b->nb, &intrin->instr);
   } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY ||
              glsl_type_is_matrix(tail->type)) {
      unsigned elems = glsl_get_length(tail->type);
      nir_deref_array *deref_arr = nir_deref_array_create(b);
      deref_arr->deref_array_type = nir_deref_array_type_direct;
      deref_arr->deref.type = glsl_get_array_element(tail->type);
      tail->child = &deref_arr->deref;
      for (unsigned i = 0; i < elems; i++) {
         deref_arr->base_offset = i;
         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
      }
   } else {
      assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT);
      unsigned elems = glsl_get_length(tail->type);
      nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0);
      tail->child = &deref_struct->deref;
      for (unsigned i = 0; i < elems; i++) {
         deref_struct->index = i;
         deref_struct->deref.type = glsl_get_struct_field(tail->type, i);
         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
      }
   }

   tail->child = old_child;
}