Example #1
0
static nir_ssa_def *
build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
{
   unsigned swiz[4] = {1, 0, 0, 0};
   nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
   return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
}
Example #2
0
static bool
lower_offset(nir_builder *b, nir_tex_instr *tex)
{
   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
   if (offset_index < 0)
      return false;

   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
   assert(coord_index >= 0);

   assert(tex->src[offset_index].src.is_ssa);
   assert(tex->src[coord_index].src.is_ssa);
   nir_ssa_def *offset = tex->src[offset_index].src.ssa;
   nir_ssa_def *coord = tex->src[coord_index].src.ssa;

   b->cursor = nir_before_instr(&tex->instr);

   nir_ssa_def *offset_coord;
   if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
      if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
         offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
      } else {
         nir_ssa_def *txs = get_texture_size(b, tex);
         nir_ssa_def *scale = nir_frcp(b, txs);

         offset_coord = nir_fadd(b, coord,
                                 nir_fmul(b,
                                          nir_i2f32(b, offset),
                                          scale));
      }
   } else {
      offset_coord = nir_iadd(b, coord, offset);
   }

   if (tex->is_array) {
      /* The offset is not applied to the array index */
      if (tex->coord_components == 2) {
         offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
                                    nir_channel(b, coord, 1));
      } else if (tex->coord_components == 3) {
         offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
                                    nir_channel(b, offset_coord, 1),
                                    nir_channel(b, coord, 2));
      } else {
         unreachable("Invalid number of components");
      }
   }

   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
                         nir_src_for_ssa(offset_coord));

   nir_tex_instr_remove_src(tex, offset_index);

   return true;
}
Example #3
0
/* Computes the determinate of the submatrix given by taking src and
 * removing the specified row and column.
 */
static nir_ssa_def *
build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
                 unsigned size, unsigned row, unsigned col)
{
   assert(row < size && col < size);
   if (size == 2) {
      return nir_channel(b, src->elems[1 - col]->def, 1 - row);
   } else {
      /* Swizzle to get all but the specified row */
      unsigned swiz[3];
      for (unsigned j = 0; j < 3; j++)
         swiz[j] = j + (j >= row);

      /* Grab all but the specified column */
      nir_ssa_def *subcol[3];
      for (unsigned j = 0; j < size; j++) {
         if (j != col) {
            subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
                                                swiz, size - 1, true);
         }
      }

      if (size == 3) {
         return build_mat2_det(b, subcol);
      } else {
         assert(size == 4);
         return build_mat3_det(b, subcol);
      }
   }
}
static void
saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
{
   b->cursor = nir_before_instr(&tex->instr);

   /* Walk through the sources saturating the requested arguments. */
   for (unsigned i = 0; i < tex->num_srcs; i++) {
      if (tex->src[i].src_type != nir_tex_src_coord)
         continue;

      nir_ssa_def *src =
         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);

      /* split src into components: */
      nir_ssa_def *comp[4];

      for (unsigned j = 0; j < tex->coord_components; j++)
         comp[j] = nir_channel(b, src, j);

      /* clamp requested components, array index does not get clamped: */
      unsigned ncomp = tex->coord_components;
      if (tex->is_array)
         ncomp--;

      for (unsigned j = 0; j < ncomp; j++) {
         if ((1 << j) & sat_mask) {
            if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
               /* non-normalized texture coords, so clamp to texture
                * size rather than [0.0, 1.0]
                */
               nir_ssa_def *txs = get_texture_size(b, tex);
               comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
               comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
            } else {
               comp[j] = nir_fsat(b, comp[j]);
            }
         }
      }

      /* and move the result back into a single vecN: */
      src = nir_vec(b, comp, tex->coord_components);

      nir_instr_rewrite_src(&tex->instr,
                            &tex->src[i].src,
                            nir_src_for_ssa(src));
   }
}
/* Multiply interp_var_at_offset's offset by transform.x to flip it. */
static void
lower_interp_var_at_offset(lower_wpos_ytransform_state *state,
                           nir_intrinsic_instr *interp)
{
    nir_builder *b = &state->b;
    nir_ssa_def *offset;
    nir_ssa_def *flip_y;

    b->cursor = nir_before_instr(&interp->instr);

    offset = nir_ssa_for_src(b, interp->src[0], 2);
    flip_y = nir_fmul(b, nir_channel(b, offset, 1),
                      nir_channel(b, get_transform(state), 0));
    nir_instr_rewrite_src(&interp->instr, &interp->src[0],
                          nir_src_for_ssa(nir_vec2(b, nir_channel(b, offset, 0),
                                          flip_y)));
}
static void
lower_load_sample_pos(lower_wpos_ytransform_state *state,
                      nir_intrinsic_instr *intr)
{
    nir_builder *b = &state->b;
    b->cursor = nir_after_instr(&intr->instr);

    nir_ssa_def *pos = &intr->dest.ssa;
    nir_ssa_def *scale = nir_channel(b, get_transform(state), 0);
    nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2);
    /* Either y or 1-y for scale equal to 1 or -1 respectively. */
    nir_ssa_def *flipped_y =
        nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)),
                 nir_fmul(b, nir_channel(b, pos, 1), scale));
    nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y);

    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pos),
                                   flipped_pos->parent_instr);
}
Example #7
0
static nir_ssa_def *
build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
{
   unsigned yzx[4] = {1, 2, 0, 0};
   unsigned zxy[4] = {2, 0, 1, 0};

   nir_ssa_def *prod0 =
      nir_fmul(b, col[0],
               nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
                           nir_swizzle(b, col[2], zxy, 3, true)));
   nir_ssa_def *prod1 =
      nir_fmul(b, col[0],
               nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
                           nir_swizzle(b, col[2], yzx, 3, true)));

   nir_ssa_def *diff = nir_fsub(b, prod0, prod1);

   return nir_fadd(b, nir_channel(b, diff, 0),
                      nir_fadd(b, nir_channel(b, diff, 1),
                                  nir_channel(b, diff, 2)));
}
Example #8
0
static nir_ssa_def *
get_texture_lod(nir_builder *b, nir_tex_instr *tex)
{
   b->cursor = nir_before_instr(&tex->instr);

   nir_tex_instr *tql;

   unsigned num_srcs = 0;
   for (unsigned i = 0; i < tex->num_srcs; i++) {
      if (tex->src[i].src_type == nir_tex_src_coord ||
          tex->src[i].src_type == nir_tex_src_texture_deref ||
          tex->src[i].src_type == nir_tex_src_sampler_deref ||
          tex->src[i].src_type == nir_tex_src_texture_offset ||
          tex->src[i].src_type == nir_tex_src_sampler_offset ||
          tex->src[i].src_type == nir_tex_src_texture_handle ||
          tex->src[i].src_type == nir_tex_src_sampler_handle)
         num_srcs++;
   }

   tql = nir_tex_instr_create(b->shader, num_srcs);
   tql->op = nir_texop_lod;
   tql->coord_components = tex->coord_components;
   tql->sampler_dim = tex->sampler_dim;
   tql->is_array = tex->is_array;
   tql->is_shadow = tex->is_shadow;
   tql->is_new_style_shadow = tex->is_new_style_shadow;
   tql->texture_index = tex->texture_index;
   tql->sampler_index = tex->sampler_index;
   tql->dest_type = nir_type_float;

   unsigned idx = 0;
   for (unsigned i = 0; i < tex->num_srcs; i++) {
      if (tex->src[i].src_type == nir_tex_src_coord ||
          tex->src[i].src_type == nir_tex_src_texture_deref ||
          tex->src[i].src_type == nir_tex_src_sampler_deref ||
          tex->src[i].src_type == nir_tex_src_texture_offset ||
          tex->src[i].src_type == nir_tex_src_sampler_offset ||
          tex->src[i].src_type == nir_tex_src_texture_handle ||
          tex->src[i].src_type == nir_tex_src_sampler_handle) {
         nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql);
         tql->src[idx].src_type = tex->src[i].src_type;
         idx++;
      }
   }

   nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL);
   nir_builder_instr_insert(b, &tql->instr);

   /* The LOD is the y component of the result */
   return nir_channel(b, &tql->dest.ssa, 1);
}
Example #9
0
static nir_ssa_def *
build_mat4_det(nir_builder *b, nir_ssa_def **col)
{
   nir_ssa_def *subdet[4];
   for (unsigned i = 0; i < 4; i++) {
      unsigned swiz[3];
      for (unsigned j = 0; j < 3; j++)
         swiz[j] = j + (j >= i);

      nir_ssa_def *subcol[3];
      subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
      subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
      subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);

      subdet[i] = build_mat3_det(b, subcol);
   }

   nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));

   return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
                                  nir_channel(b, prod, 1)),
                      nir_fsub(b, nir_channel(b, prod, 2),
                                  nir_channel(b, prod, 3)));
}
Example #10
0
static void
lower_load_pointcoord(lower_wpos_ytransform_state *state,
                      nir_intrinsic_instr *intr)
{
   nir_builder *b = &state->b;
   b->cursor = nir_after_instr(&intr->instr);

   nir_ssa_def *pntc = &intr->dest.ssa;
   nir_ssa_def *transform = get_transform(state);
   nir_ssa_def *y = nir_channel(b, pntc, 1);
   /* The offset is 1 if we're flipping, 0 otherwise. */
   nir_ssa_def *offset = nir_fmax(b, nir_channel(b, transform, 2),
                                  nir_imm_float(b, 0.0));
   /* Flip the sign of y if we're flipping. */
   nir_ssa_def *scaled = nir_fmul(b, y, nir_channel(b, transform, 0));

   /* Reassemble the vector. */
   nir_ssa_def *flipped_pntc = nir_vec2(b,
                                        nir_channel(b, pntc, 0),
                                        nir_fadd(b, offset, scaled));

   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pntc),
                                  flipped_pntc->parent_instr);
}
Example #11
0
/* turns 'fddy(p)' into 'fddy(fmul(p, transform.x))' */
static void
lower_fddy(lower_wpos_ytransform_state *state, nir_alu_instr *fddy)
{
    nir_builder *b = &state->b;
    nir_ssa_def *p, *pt, *trans;

    b->cursor = nir_before_instr(&fddy->instr);

    p = nir_ssa_for_alu_src(b, fddy, 0);
    trans = get_transform(state);
    pt = nir_fmul(b, p, nir_channel(b, trans, 0));

    nir_instr_rewrite_src(&fddy->instr,
                          &fddy->src[0].src,
                          nir_src_for_ssa(pt));

    for (unsigned i = 0; i < 4; i++)
        fddy->src[0].swizzle[i] = MIN2(i, pt->num_components - 1);
}
static void
swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
{
   assert(tex->dest.is_ssa);

   b->cursor = nir_after_instr(&tex->instr);

   nir_ssa_def *swizzled;
   if (tex->op == nir_texop_tg4) {
      if (swizzle[tex->component] < 4) {
         /* This one's easy */
         tex->component = swizzle[tex->component];
         return;
      } else {
         swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
      }
   } else {
      assert(nir_tex_instr_dest_size(tex) == 4);
      if (swizzle[0] < 4 && swizzle[1] < 4 &&
          swizzle[2] < 4 && swizzle[3] < 4) {
         unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
         /* We have no 0's or 1's, just emit a swizzling MOV */
         swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
      } else {
         nir_ssa_def *srcs[4];
         for (unsigned i = 0; i < 4; i++) {
            if (swizzle[i] < 4) {
               srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
            } else {
               srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
            }
         }
         swizzled = nir_vec(b, srcs, 4);
      }
   }

   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
                                  swizzled->parent_instr);
}
Example #13
0
static void
project_src(nir_builder *b, nir_tex_instr *tex)
{
   /* Find the projector in the srcs list, if present. */
   int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
   if (proj_index < 0)
      return;

   b->cursor = nir_before_instr(&tex->instr);

   nir_ssa_def *inv_proj =
      nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));

   /* Walk through the sources projecting the arguments. */
   for (unsigned i = 0; i < tex->num_srcs; i++) {
      switch (tex->src[i].src_type) {
      case nir_tex_src_coord:
      case nir_tex_src_comparator:
         break;
      default:
         continue;
      }
      nir_ssa_def *unprojected =
         nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
      nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);

      /* Array indices don't get projected, so make an new vector with the
       * coordinate's array index untouched.
       */
      if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
         switch (tex->coord_components) {
         case 4:
            projected = nir_vec4(b,
                                 nir_channel(b, projected, 0),
                                 nir_channel(b, projected, 1),
                                 nir_channel(b, projected, 2),
                                 nir_channel(b, unprojected, 3));
            break;
         case 3:
            projected = nir_vec3(b,
                                 nir_channel(b, projected, 0),
                                 nir_channel(b, projected, 1),
                                 nir_channel(b, unprojected, 2));
            break;
         case 2:
            projected = nir_vec2(b,
                                 nir_channel(b, projected, 0),
                                 nir_channel(b, unprojected, 1));
            break;
         default:
            unreachable("bad texture coord count for array");
            break;
         }
      }

      nir_instr_rewrite_src(&tex->instr,
                            &tex->src[i].src,
                            nir_src_for_ssa(projected));
   }

   nir_tex_instr_remove_src(tex, proj_index);
}
Example #14
0
static void *vc4_get_yuv_vs(struct pipe_context *pctx)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;

   if (vc4->yuv_linear_blit_vs)
           return vc4->yuv_linear_blit_vs;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_VERTEX);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
   b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs");

   const struct glsl_type *vec4 = glsl_vec4_type();
   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");

   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                               vec4, "gl_Position");
   pos_out->data.location = VARYING_SLOT_POS;

   nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl);

   return vc4->yuv_linear_blit_vs;
}

static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;
   struct pipe_shader_state **cached_shader;
   const char *name;

   if (cpp == 1) {
           cached_shader = &vc4->yuv_linear_blit_fs_8bit;
           name = "linear_blit_8bit_fs";
   } else {
           cached_shader = &vc4->yuv_linear_blit_fs_16bit;
           name = "linear_blit_16bit_fs";
   }

   if (*cached_shader)
           return *cached_shader;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_FRAGMENT);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
   b.shader->info.name = ralloc_strdup(b.shader, name);

   const struct glsl_type *vec4 = glsl_vec4_type();
   const struct glsl_type *glsl_int = glsl_int_type();

   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                 vec4, "f_color");
   color_out->data.location = FRAG_RESULT_COLOR;

   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");
   pos_in->data.location = VARYING_SLOT_POS;
   nir_ssa_def *pos = nir_load_var(&b, pos_in);

   nir_ssa_def *one = nir_imm_int(&b, 1);
   nir_ssa_def *two = nir_imm_int(&b, 2);

   nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
   nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));

   nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform,
                                                 glsl_int, "stride");
   nir_ssa_def *stride = nir_load_var(&b, stride_in);

   nir_ssa_def *x_offset;
   nir_ssa_def *y_offset;
   if (cpp == 1) {
           nir_ssa_def *intra_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, one), two);
           nir_ssa_def *inter_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one);

           x_offset = nir_iadd(&b,
                               intra_utile_x_offset,
                               inter_utile_x_offset);
           y_offset = nir_imul(&b,
                               nir_iadd(&b,
                                        nir_ishl(&b, y, one),
                                        nir_ushr(&b, nir_iand(&b, x, two), one)),
                               stride);
   } else {
           x_offset = nir_ishl(&b, x, two);
           y_offset = nir_imul(&b, y, stride);
   }

   nir_intrinsic_instr *load =
           nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
   load->num_components = 1;
   nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
   load->src[0] = nir_src_for_ssa(one);
   load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset));
   nir_builder_instr_insert(&b, &load->instr);

   nir_store_var(&b, color_out,
                 nir_unpack_unorm_4x8(&b, &load->dest.ssa),
                 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl);

   return *cached_shader;
}

static bool
vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_resource *src = vc4_resource(info->src.resource);
        struct vc4_resource *dst = vc4_resource(info->dst.resource);
        bool ok;

        if (src->tiled)
                return false;
        if (src->base.format != PIPE_FORMAT_R8_UNORM &&
            src->base.format != PIPE_FORMAT_R8G8_UNORM)
                return false;

        /* YUV blits always turn raster-order to tiled */
        assert(dst->base.format == src->base.format);
        assert(dst->tiled);

        /* Always 1:1 and at the origin */
        assert(info->src.box.x == 0 && info->dst.box.x == 0);
        assert(info->src.box.y == 0 && info->dst.box.y == 0);
        assert(info->src.box.width == info->dst.box.width);
        assert(info->src.box.height == info->dst.box.height);

        if ((src->slices[info->src.level].offset & 3) ||
            (src->slices[info->src.level].stride & 3)) {
                perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n",
                           src->slices[info->src.level].offset,
                           src->slices[info->src.level].stride);
                goto fallback;
        }

        vc4_blitter_save(vc4);

        /* Create a renderable surface mapping the T-tiled shadow buffer.
         */
        struct pipe_surface dst_tmpl;
        util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
                                         info->dst.level, info->dst.box.z);
        dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM;
        struct pipe_surface *dst_surf =
                pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
        if (!dst_surf) {
                fprintf(stderr, "Failed to create YUV dst surface\n");
                util_blitter_unset_running_flag(vc4->blitter);
                return false;
        }
        dst_surf->width /= 2;
        if (dst->cpp == 1)
                dst_surf->height /= 2;

        /* Set the constant buffer. */
        uint32_t stride = src->slices[info->src.level].stride;
        struct pipe_constant_buffer cb_uniforms = {
                .user_buffer = &stride,
                .buffer_size = sizeof(stride),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms);
        struct pipe_constant_buffer cb_src = {
                .buffer = info->src.resource,
                .buffer_offset = src->slices[info->src.level].offset,
                .buffer_size = (src->bo->size -
                                src->slices[info->src.level].offset),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src);

        /* Unbind the textures, to make sure we don't try to recurse into the
         * shadow blit.
         */
        pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
        pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);

        util_blitter_custom_shader(vc4->blitter, dst_surf,
                                   vc4_get_yuv_vs(pctx),
                                   vc4_get_yuv_fs(pctx, src->cpp));

        util_blitter_restore_textures(vc4->blitter);
        util_blitter_restore_constant_buffer_state(vc4->blitter);
        /* Restore cb1 (util_blitter doesn't handle this one). */
        struct pipe_constant_buffer cb_disabled = { 0 };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled);

        pipe_surface_reference(&dst_surf, NULL);

        return true;

fallback:
        /* Do an immediate SW fallback, since the render blit path
         * would just recurse.
         */
        ok = util_try_blit_via_copy_region(pctx, info);
        assert(ok); (void)ok;

        return true;
}

static bool
vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(ctx);

        if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
                fprintf(stderr, "blit unsupported %s -> %s\n",
                    util_format_short_name(info->src.resource->format),
                    util_format_short_name(info->dst.resource->format));
                return false;
        }

        /* Enable the scissor, so we get a minimal set of tiles rendered. */
        if (!info->scissor_enable) {
                info->scissor_enable = true;
                info->scissor.minx = info->dst.box.x;
                info->scissor.miny = info->dst.box.y;
                info->scissor.maxx = info->dst.box.x + info->dst.box.width;
                info->scissor.maxy = info->dst.box.y + info->dst.box.height;
        }

        vc4_blitter_save(vc4);
        util_blitter_blit(vc4->blitter, info);

        return true;
}

/* Optimal hardware path for blitting pixels.
 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
 */
void
vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
{
        struct pipe_blit_info info = *blit_info;

        if (vc4_yuv_blit(pctx, blit_info))
                return;

        if (vc4_tile_blit(pctx, blit_info))
                return;

        if (info.mask & PIPE_MASK_S) {
                if (util_try_blit_via_copy_region(pctx, &info))
                        return;

                info.mask &= ~PIPE_MASK_S;
                fprintf(stderr, "cannot blit stencil, skipping\n");
        }

        if (vc4_render_blit(pctx, &info))
                return;

        fprintf(stderr, "Unsupported blit\n");
}
Example #15
0
static void
_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
                      nir_ssa_def *index, nir_ssa_def *offset,
                      struct vtn_access_chain *chain, unsigned chain_idx,
                      struct vtn_type *type, struct vtn_ssa_value **inout)
{
   if (chain && chain_idx >= chain->length)
      chain = NULL;

   if (load && chain == NULL && *inout == NULL)
      *inout = vtn_create_ssa_value(b, type->type);

   enum glsl_base_type base_type = glsl_get_base_type(type->type);
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_BOOL:
      /* This is where things get interesting.  At this point, we've hit
       * a vector, a scalar, or a matrix.
       */
      if (glsl_type_is_matrix(type->type)) {
         if (chain == NULL) {
            /* Loading the whole matrix */
            struct vtn_ssa_value *transpose;
            unsigned num_ops, vec_width;
            if (type->row_major) {
               num_ops = glsl_get_vector_elements(type->type);
               vec_width = glsl_get_matrix_columns(type->type);
               if (load) {
                  const struct glsl_type *transpose_type =
                     glsl_matrix_type(base_type, vec_width, num_ops);
                  *inout = vtn_create_ssa_value(b, transpose_type);
               } else {
                  transpose = vtn_ssa_transpose(b, *inout);
                  inout = &transpose;
               }
            } else {
               num_ops = glsl_get_matrix_columns(type->type);
               vec_width = glsl_get_vector_elements(type->type);
            }

            for (unsigned i = 0; i < num_ops; i++) {
               nir_ssa_def *elem_offset =
                  nir_iadd(&b->nb, offset,
                           nir_imm_int(&b->nb, i * type->stride));
               _vtn_load_store_tail(b, op, load, index, elem_offset,
                                    &(*inout)->elems[i],
                                    glsl_vector_type(base_type, vec_width));
            }

            if (load && type->row_major)
               *inout = vtn_ssa_transpose(b, *inout);
         } else if (type->row_major) {
            /* Row-major but with an access chiain. */
            nir_ssa_def *col_offset =
               vtn_access_link_as_ssa(b, chain->link[chain_idx],
                                      type->array_element->stride);
            offset = nir_iadd(&b->nb, offset, col_offset);

            if (chain_idx + 1 < chain->length) {
               /* Picking off a single element */
               nir_ssa_def *row_offset =
                  vtn_access_link_as_ssa(b, chain->link[chain_idx + 1],
                                         type->stride);
               offset = nir_iadd(&b->nb, offset, row_offset);
               if (load)
                  *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
               _vtn_load_store_tail(b, op, load, index, offset, inout,
                                    glsl_scalar_type(base_type));
            } else {
               /* Grabbing a column; picking one element off each row */
               unsigned num_comps = glsl_get_vector_elements(type->type);
               const struct glsl_type *column_type =
                  glsl_get_column_type(type->type);

               nir_ssa_def *comps[4];
               for (unsigned i = 0; i < num_comps; i++) {
                  nir_ssa_def *elem_offset =
                     nir_iadd(&b->nb, offset,
                              nir_imm_int(&b->nb, i * type->stride));

                  struct vtn_ssa_value *comp, temp_val;
                  if (!load) {
                     temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
                     temp_val.type = glsl_scalar_type(base_type);
                  }
                  comp = &temp_val;
                  _vtn_load_store_tail(b, op, load, index, elem_offset,
                                       &comp, glsl_scalar_type(base_type));
                  comps[i] = comp->def;
               }

               if (load) {
                  if (*inout == NULL)
                     *inout = vtn_create_ssa_value(b, column_type);

                  (*inout)->def = nir_vec(&b->nb, comps, num_comps);
               }
            }
         } else {
            /* Column-major with a deref. Fall through to array case. */
            nir_ssa_def *col_offset =
               vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
            offset = nir_iadd(&b->nb, offset, col_offset);

            _vtn_block_load_store(b, op, load, index, offset,
                                  chain, chain_idx + 1,
                                  type->array_element, inout);
         }
      } else if (chain == NULL) {
         /* Single whole vector */
         assert(glsl_type_is_vector_or_scalar(type->type));
         _vtn_load_store_tail(b, op, load, index, offset, inout, type->type);
      } else {
         /* Single component of a vector. Fall through to array case. */
         nir_ssa_def *elem_offset =
            vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
         offset = nir_iadd(&b->nb, offset, elem_offset);

         _vtn_block_load_store(b, op, load, index, offset, NULL, 0,
                               type->array_element, inout);
      }
      return;

   case GLSL_TYPE_ARRAY: {
      unsigned elems = glsl_get_length(type->type);
      for (unsigned i = 0; i < elems; i++) {
         nir_ssa_def *elem_off =
            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
                               type->array_element, &(*inout)->elems[i]);
      }
      return;
   }

   case GLSL_TYPE_STRUCT: {
      unsigned elems = glsl_get_length(type->type);
      for (unsigned i = 0; i < elems; i++) {
         nir_ssa_def *elem_off =
            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
                               type->members[i], &(*inout)->elems[i]);
      }
      return;
   }

   default:
      unreachable("Invalid block member type");
   }
}
Example #16
0
/* see emit_wpos_adjustment() in st_mesa_to_tgsi.c */
static void
emit_wpos_adjustment(lower_wpos_ytransform_state *state,
                     nir_intrinsic_instr *intr,
                     bool invert, float adjX, float adjY[2])
{
    nir_builder *b = &state->b;
    nir_variable *fragcoord = intr->variables[0]->var;
    nir_ssa_def *wpostrans, *wpos_temp, *wpos_temp_y, *wpos_input;

    assert(intr->dest.is_ssa);

    b->cursor = nir_before_instr(&intr->instr);

    wpostrans = get_transform(state);
    wpos_input = nir_load_var(b, fragcoord);

    /* First, apply the coordinate shift: */
    if (adjX || adjY[0] || adjY[1]) {
        if (adjY[0] != adjY[1]) {
            /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
             * depending on whether inversion is actually going to be applied
             * or not, which is determined by testing against the inversion
             * state variable used below, which will be either +1 or -1.
             */
            nir_ssa_def *adj_temp;

            adj_temp = nir_cmp(b,
                               nir_channel(b, wpostrans, invert ? 2 : 0),
                               nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f),
                               nir_imm_vec4(b, adjX, adjY[1], 0.0f, 0.0f));

            wpos_temp = nir_fadd(b, wpos_input, adj_temp);
        } else {
            wpos_temp = nir_fadd(b,
                                 wpos_input,
                                 nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f));
        }
        wpos_input = wpos_temp;
    } else {
        /* MOV wpos_temp, input[wpos]
         */
        wpos_temp = wpos_input;
    }

    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
     * inversion/identity, or the other way around if we're drawing to an FBO.
     */
    if (invert) {
        /* wpos_temp.y = wpos_input * wpostrans.xxxx + wpostrans.yyyy */
        wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
                                           nir_channel(b, wpostrans, 0)),
                               nir_channel(b, wpostrans, 1));
    } else {
        /* wpos_temp.y = wpos_input * wpostrans.zzzz + wpostrans.wwww */
        wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
                                           nir_channel(b, wpostrans, 2)),
                               nir_channel(b, wpostrans, 3));
    }

    wpos_temp = nir_vec4(b,
                         nir_channel(b, wpos_temp, 0),
                         wpos_temp_y,
                         nir_channel(b, wpos_temp, 2),
                         nir_channel(b, wpos_temp, 3));

    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(wpos_temp));
}
Example #17
0
static void
convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
                   nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
                   nir_ssa_def *a)
{
   nir_const_value m[3] = {
      { .f32 = { 1.0f,  0.0f,         1.59602678f, 0.0f } },
      { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
      { .f32 = { 1.0f,  2.01723214f,  0.0f,        0.0f } }
   };

   nir_ssa_def *yuv =
      nir_vec4(b,
               nir_fmul(b, nir_imm_float(b, 1.16438356f),
                        nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),
               nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),
               nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),
               nir_imm_float(b, 0.0));

   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
   nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
   nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));

   nir_ssa_def *result = nir_vec4(b, red, green, blue, a);

   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
}

static void
lower_y_uv_external(nir_builder *b, nir_tex_instr *tex)
{
Example #18
0
static nir_shader *
build_nir_itob_compute_shader(struct radv_device *dev)
{
    nir_builder b;
    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
                                           false,
                                           false,
                                           GLSL_TYPE_FLOAT);
    const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
                                       false,
                                       false,
                                       GLSL_TYPE_FLOAT);
    nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
    b.shader->info->name = ralloc_strdup(b.shader, "meta_itob_cs");
    b.shader->info->cs.local_size[0] = 16;
    b.shader->info->cs.local_size[1] = 16;
    b.shader->info->cs.local_size[2] = 1;
    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
                              sampler_type, "s_tex");
    input_img->data.descriptor_set = 0;
    input_img->data.binding = 0;

    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
                               img_type, "out_img");
    output_img->data.descriptor_set = 0;
    output_img->data.binding = 1;

    nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
    nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
    nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                            b.shader->info->cs.local_size[0],
                                            b.shader->info->cs.local_size[1],
                                            b.shader->info->cs.local_size[2], 0);

    nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);



    nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
    offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
    offset->num_components = 2;
    nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
    nir_builder_instr_insert(&b, &offset->instr);

    nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
    stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
    stride->num_components = 1;
    nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
    nir_builder_instr_insert(&b, &stride->instr);

    nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);

    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
    tex->op = nir_texop_txf;
    tex->src[0].src_type = nir_tex_src_coord;
    tex->src[0].src = nir_src_for_ssa(img_coord);
    tex->src[1].src_type = nir_tex_src_lod;
    tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
    tex->dest_type = nir_type_float;
    tex->is_array = false;
    tex->coord_components = 2;
    tex->texture = nir_deref_var_create(tex, input_img);
    tex->sampler = NULL;

    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
    nir_builder_instr_insert(&b, &tex->instr);

    nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
    nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);

    nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
    tmp = nir_iadd(&b, tmp, pos_x);

    nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);

    nir_ssa_def *outval = &tex->dest.ssa;
    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
    store->src[0] = nir_src_for_ssa(coord);
    store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
    store->src[2] = nir_src_for_ssa(outval);
    store->variables[0] = nir_deref_var_create(store, output_img);

    nir_builder_instr_insert(&b, &store->instr);
    return b.shader;
}
Example #19
0
static struct vtn_ssa_value *
matrix_multiply(struct vtn_builder *b,
                struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
{

   struct vtn_ssa_value *src0 = wrap_matrix(b, _src0);
   struct vtn_ssa_value *src1 = wrap_matrix(b, _src1);
   struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed);
   struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed);

   unsigned src0_rows = glsl_get_vector_elements(src0->type);
   unsigned src0_columns = glsl_get_matrix_columns(src0->type);
   unsigned src1_columns = glsl_get_matrix_columns(src1->type);

   const struct glsl_type *dest_type;
   if (src1_columns > 1) {
      dest_type = glsl_matrix_type(glsl_get_base_type(src0->type),
                                   src0_rows, src1_columns);
   } else {
      dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows);
   }
   struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type);

   dest = wrap_matrix(b, dest);

   bool transpose_result = false;
   if (src0_transpose && src1_transpose) {
      /* transpose(A) * transpose(B) = transpose(B * A) */
      src1 = src0_transpose;
      src0 = src1_transpose;
      src0_transpose = NULL;
      src1_transpose = NULL;
      transpose_result = true;
   }

   if (src0_transpose && !src1_transpose &&
       glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
      /* We already have the rows of src0 and the columns of src1 available,
       * so we can just take the dot product of each row with each column to
       * get the result.
       */

      for (unsigned i = 0; i < src1_columns; i++) {
         nir_ssa_def *vec_src[4];
         for (unsigned j = 0; j < src0_rows; j++) {
            vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
                                          src1->elems[i]->def);
         }
         dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
      }
   } else {
      /* We don't handle the case where src1 is transposed but not src0, since
       * the general case only uses individual components of src1 so the
       * optimizer should chew through the transpose we emitted for src1.
       */

      for (unsigned i = 0; i < src1_columns; i++) {
         /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
         dest->elems[i]->def =
            nir_fmul(&b->nb, src0->elems[0]->def,
                     nir_channel(&b->nb, src1->elems[i]->def, 0));
         for (unsigned j = 1; j < src0_columns; j++) {
            dest->elems[i]->def =
               nir_fadd(&b->nb, dest->elems[i]->def,
                        nir_fmul(&b->nb, src0->elems[j]->def,
                                 nir_channel(&b->nb, src1->elems[i]->def, j)));
         }
      }
   }

   dest = unwrap_matrix(dest);

   if (transpose_result)
      dest = vtn_ssa_transpose(b, dest);

   return dest;
}
Example #20
0
void
vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
               const uint32_t *w, unsigned count)
{
   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
   const struct glsl_type *type =
      vtn_value(b, w[1], vtn_value_type_type)->type->type;

   vtn_foreach_decoration(b, val, handle_no_contraction, NULL);

   /* Collect the various SSA sources */
   const unsigned num_inputs = count - 3;
   struct vtn_ssa_value *vtn_src[4] = { NULL, };
   for (unsigned i = 0; i < num_inputs; i++)
      vtn_src[i] = vtn_ssa_value(b, w[i + 3]);

   if (glsl_type_is_matrix(vtn_src[0]->type) ||
       (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
      vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
      b->nb.exact = false;
      return;
   }

   val->ssa = vtn_create_ssa_value(b, type);
   nir_ssa_def *src[4] = { NULL, };
   for (unsigned i = 0; i < num_inputs; i++) {
      assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type));
      src[i] = vtn_src[i]->def;
   }

   switch (opcode) {
   case SpvOpAny:
      if (src[0]->num_components == 1) {
         val->ssa->def = nir_imov(&b->nb, src[0]);
      } else {
         nir_op op;
         switch (src[0]->num_components) {
         case 2:  op = nir_op_bany_inequal2; break;
         case 3:  op = nir_op_bany_inequal3; break;
         case 4:  op = nir_op_bany_inequal4; break;
         default: unreachable("invalid number of components");
         }
         val->ssa->def = nir_build_alu(&b->nb, op, src[0],
                                       nir_imm_int(&b->nb, NIR_FALSE),
                                       NULL, NULL);
      }
      break;

   case SpvOpAll:
      if (src[0]->num_components == 1) {
         val->ssa->def = nir_imov(&b->nb, src[0]);
      } else {
         nir_op op;
         switch (src[0]->num_components) {
         case 2:  op = nir_op_ball_iequal2;  break;
         case 3:  op = nir_op_ball_iequal3;  break;
         case 4:  op = nir_op_ball_iequal4;  break;
         default: unreachable("invalid number of components");
         }
         val->ssa->def = nir_build_alu(&b->nb, op, src[0],
                                       nir_imm_int(&b->nb, NIR_TRUE),
                                       NULL, NULL);
      }
      break;

   case SpvOpOuterProduct: {
      for (unsigned i = 0; i < src[1]->num_components; i++) {
         val->ssa->elems[i]->def =
            nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i));
      }
      break;
   }

   case SpvOpDot:
      val->ssa->def = nir_fdot(&b->nb, src[0], src[1]);
      break;

   case SpvOpIAddCarry:
      assert(glsl_type_is_struct(val->ssa->type));
      val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]);
      val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]);
      break;

   case SpvOpISubBorrow:
      assert(glsl_type_is_struct(val->ssa->type));
      val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]);
      val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]);
      break;

   case SpvOpUMulExtended:
      assert(glsl_type_is_struct(val->ssa->type));
      val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
      val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]);
      break;

   case SpvOpSMulExtended:
      assert(glsl_type_is_struct(val->ssa->type));
      val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
      val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]);
      break;

   case SpvOpFwidth:
      val->ssa->def = nir_fadd(&b->nb,
                               nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
                               nir_fabs(&b->nb, nir_fddy(&b->nb, src[0])));
      break;
   case SpvOpFwidthFine:
      val->ssa->def = nir_fadd(&b->nb,
                               nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
                               nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0])));
      break;
   case SpvOpFwidthCoarse:
      val->ssa->def = nir_fadd(&b->nb,
                               nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
                               nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0])));
      break;

   case SpvOpVectorTimesScalar:
      /* The builder will take care of splatting for us. */
      val->ssa->def = nir_fmul(&b->nb, src[0], src[1]);
      break;

   case SpvOpIsNan:
      val->ssa->def = nir_fne(&b->nb, src[0], src[0]);
      break;

   case SpvOpIsInf:
      val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]),
                                      nir_imm_float(&b->nb, INFINITY));
      break;

   case SpvOpFUnordEqual:
   case SpvOpFUnordNotEqual:
   case SpvOpFUnordLessThan:
   case SpvOpFUnordGreaterThan:
   case SpvOpFUnordLessThanEqual:
   case SpvOpFUnordGreaterThanEqual: {
      bool swap;
      nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type);
      nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type);
      nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap, src_alu_type, dst_alu_type);

      if (swap) {
         nir_ssa_def *tmp = src[0];
         src[0] = src[1];
         src[1] = tmp;
      }

      val->ssa->def =
         nir_ior(&b->nb,
                 nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL),
                 nir_ior(&b->nb,
                         nir_fne(&b->nb, src[0], src[0]),
                         nir_fne(&b->nb, src[1], src[1])));
      break;
   }

   case SpvOpFOrdEqual:
   case SpvOpFOrdNotEqual:
   case SpvOpFOrdLessThan:
   case SpvOpFOrdGreaterThan:
   case SpvOpFOrdLessThanEqual:
   case SpvOpFOrdGreaterThanEqual: {
      bool swap;
      nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type);
      nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type);
      nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap, src_alu_type, dst_alu_type);

      if (swap) {
         nir_ssa_def *tmp = src[0];
         src[0] = src[1];
         src[1] = tmp;
      }

      val->ssa->def =
         nir_iand(&b->nb,
                  nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL),
                  nir_iand(&b->nb,
                          nir_feq(&b->nb, src[0], src[0]),
                          nir_feq(&b->nb, src[1], src[1])));
      break;
   }

   default: {
      bool swap;
      nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type);
      nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type);
      nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap, src_alu_type, dst_alu_type);

      if (swap) {
         nir_ssa_def *tmp = src[0];
         src[0] = src[1];
         src[1] = tmp;
      }

      val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
      break;
   } /* default */
   }

   b->nb.exact = false;
}
Example #21
0
static void
convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
                   nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v)
{
   nir_const_value m[3] = {
      { .f32 = { 1.0f,  0.0f,         1.59602678f, 0.0f } },
      { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
      { .f32 = { 1.0f,  2.01723214f,  0.0f,        0.0f } }
   };

   nir_ssa_def *yuv =
      nir_vec4(b,
               nir_fmul(b, nir_imm_float(b, 1.16438356f),
                        nir_fadd(b, y, nir_imm_float(b, -0.0625f))),
               nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -0.5f)), 0),
               nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -0.5f)), 0),
               nir_imm_float(b, 0.0));

   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
   nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
   nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));

   nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f));

   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
}

static void
lower_y_uv_external(nir_builder *b, nir_tex_instr *tex)
{
Example #22
0
static void
vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b,
                           nir_tex_instr *txf_ms)
{
    if (txf_ms->op != nir_texop_txf_ms)
        return;

    b->cursor = nir_before_instr(&txf_ms->instr);

    nir_tex_instr *txf = nir_tex_instr_create(c->s, 1);
    txf->op = nir_texop_txf;
    txf->sampler = txf_ms->sampler;
    txf->sampler_index = txf_ms->sampler_index;
    txf->coord_components = txf_ms->coord_components;
    txf->is_shadow = txf_ms->is_shadow;
    txf->is_new_style_shadow = txf_ms->is_new_style_shadow;

    nir_ssa_def *coord = NULL, *sample_index = NULL;
    for (int i = 0; i < txf_ms->num_srcs; i++) {
        assert(txf_ms->src[i].src.is_ssa);

        switch (txf_ms->src[i].src_type) {
        case nir_tex_src_coord:
            coord = txf_ms->src[i].src.ssa;
            break;
        case nir_tex_src_ms_index:
            sample_index = txf_ms->src[i].src.ssa;
            break;
        default:
            unreachable("Unknown txf_ms src\n");
        }
    }
    assert(coord);
    assert(sample_index);

    nir_ssa_def *x = nir_channel(b, coord, 0);
    nir_ssa_def *y = nir_channel(b, coord, 1);

    uint32_t tile_w = 32;
    uint32_t tile_h = 32;
    uint32_t tile_w_shift = 5;
    uint32_t tile_h_shift = 5;
    uint32_t tile_size = (tile_h * tile_w *
                          VC4_MAX_SAMPLES * sizeof(uint32_t));
    unsigned unit = txf_ms->sampler_index;
    uint32_t w = align(c->key->tex[unit].msaa_width, tile_w);
    uint32_t w_tiles = w / tile_w;

    nir_ssa_def *x_tile = nir_ushr(b, x, nir_imm_int(b, tile_w_shift));
    nir_ssa_def *y_tile = nir_ushr(b, y, nir_imm_int(b, tile_h_shift));
    nir_ssa_def *tile_addr = nir_iadd(b,
                                      nir_imul(b, x_tile,
                                              nir_imm_int(b, tile_size)),
                                      nir_imul(b, y_tile,
                                              nir_imm_int(b, (w_tiles *
                                                      tile_size))));
    nir_ssa_def *x_subspan = nir_iand(b, x,
                                      nir_imm_int(b, (tile_w - 1) & ~1));
    nir_ssa_def *y_subspan = nir_iand(b, y,
                                      nir_imm_int(b, (tile_h - 1) & ~1));
    nir_ssa_def *subspan_addr = nir_iadd(b,
                                         nir_imul(b, x_subspan,
                                                 nir_imm_int(b, 2 * VC4_MAX_SAMPLES * sizeof(uint32_t))),
                                         nir_imul(b, y_subspan,
                                                 nir_imm_int(b,
                                                         tile_w *
                                                         VC4_MAX_SAMPLES *
                                                         sizeof(uint32_t))));

    nir_ssa_def *pixel_addr = nir_ior(b,
                                      nir_iand(b,
                                              nir_ishl(b, x,
                                                      nir_imm_int(b, 2)),
                                              nir_imm_int(b, (1 << 2))),
                                      nir_iand(b,
                                              nir_ishl(b, y,
                                                      nir_imm_int(b, 3)),
                                              nir_imm_int(b, (1 << 3))));

    nir_ssa_def *sample_addr = nir_ishl(b, sample_index, nir_imm_int(b, 4));

    nir_ssa_def *addr = nir_iadd(b,
                                 nir_ior(b, sample_addr, pixel_addr),
                                 nir_iadd(b, subspan_addr, tile_addr));

    txf->src[0].src_type = nir_tex_src_coord;
    txf->src[0].src = nir_src_for_ssa(nir_vec2(b, addr, nir_imm_int(b, 0)));
    nir_ssa_dest_init(&txf->instr, &txf->dest, 4, NULL);
    nir_builder_instr_insert(b, &txf->instr);
    nir_ssa_def_rewrite_uses(&txf_ms->dest.ssa,
                             nir_src_for_ssa(&txf->dest.ssa));
    nir_instr_remove(&txf_ms->instr);
}