Beispiel #1
0
/** Emits the interpolation for the varying inputs. */
void
fs_visitor::emit_interpolation_setup_gen6()
{
   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);

   fs_builder abld = bld.annotate("compute pixel centers");
   if (devinfo->gen >= 8 || dispatch_width == 8) {
      /* The "Register Region Restrictions" page says for BDW (and newer,
       * presumably):
       *
       *     "When destination spans two registers, the source may be one or
       *      two registers. The destination elements must be evenly split
       *      between the two registers."
       *
       * Thus we can do a single add(16) in SIMD8 or an add(32) in SIMD16 to
       * compute our pixel centers.
       */
      fs_reg int_pixel_xy(VGRF, alloc.allocate(dispatch_width / 8),
                          BRW_REGISTER_TYPE_UW);

      const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0);
      dbld.ADD(int_pixel_xy,
               fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
               fs_reg(brw_imm_v(0x11001010)));

      this->pixel_x = vgrf(glsl_type::float_type);
      this->pixel_y = vgrf(glsl_type::float_type);
      abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy);
      abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy);
   } else {
      /* The "Register Region Restrictions" page says for SNB, IVB, HSW:
       *
       *     "When destination spans two registers, the source MUST span two
       *      registers."
       *
       * Since the GRF source of the ADD will only read a single register, we
       * must do two separate ADDs in SIMD16.
       */
      fs_reg int_pixel_x = vgrf(glsl_type::uint_type);
      fs_reg int_pixel_y = vgrf(glsl_type::uint_type);
      int_pixel_x.type = BRW_REGISTER_TYPE_UW;
      int_pixel_y.type = BRW_REGISTER_TYPE_UW;
      abld.ADD(int_pixel_x,
               fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
               fs_reg(brw_imm_v(0x10101010)));
      abld.ADD(int_pixel_y,
               fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
               fs_reg(brw_imm_v(0x11001100)));

      /* As of gen6, we can no longer mix float and int sources.  We have
       * to turn the integer pixel centers into floats for their actual
       * use.
       */
      this->pixel_x = vgrf(glsl_type::float_type);
      this->pixel_y = vgrf(glsl_type::float_type);
      abld.MOV(this->pixel_x, int_pixel_x);
      abld.MOV(this->pixel_y, int_pixel_y);
   }

   abld = bld.annotate("compute pos.w");
   this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0));
   this->wpos_w = vgrf(glsl_type::float_type);
   abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);

   for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
      uint8_t reg = payload.barycentric_coord_reg[i];
      this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0));
   }
}
Beispiel #2
0
/** Emits the interpolation for the varying inputs. */
void
fs_visitor::emit_interpolation_setup_gen6()
{
   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);

   fs_builder abld = bld.annotate("compute pixel centers");
   if (devinfo->gen >= 8 || dispatch_width == 8) {
      /* The "Register Region Restrictions" page says for BDW (and newer,
       * presumably):
       *
       *     "When destination spans two registers, the source may be one or
       *      two registers. The destination elements must be evenly split
       *      between the two registers."
       *
       * Thus we can do a single add(16) in SIMD8 or an add(32) in SIMD16 to
       * compute our pixel centers.
       */
      fs_reg int_pixel_xy(VGRF, alloc.allocate(dispatch_width / 8),
                          BRW_REGISTER_TYPE_UW);

      const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0);
      dbld.ADD(int_pixel_xy,
               fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
               fs_reg(brw_imm_v(0x11001010)));

      this->pixel_x = vgrf(glsl_type::float_type);
      this->pixel_y = vgrf(glsl_type::float_type);
      abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy);
      abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy);
   } else {
      /* The "Register Region Restrictions" page says for SNB, IVB, HSW:
       *
       *     "When destination spans two registers, the source MUST span two
       *      registers."
       *
       * Since the GRF source of the ADD will only read a single register, we
       * must do two separate ADDs in SIMD16.
       */
      fs_reg int_pixel_x = vgrf(glsl_type::uint_type);
      fs_reg int_pixel_y = vgrf(glsl_type::uint_type);
      int_pixel_x.type = BRW_REGISTER_TYPE_UW;
      int_pixel_y.type = BRW_REGISTER_TYPE_UW;
      abld.ADD(int_pixel_x,
               fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
               fs_reg(brw_imm_v(0x10101010)));
      abld.ADD(int_pixel_y,
               fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
               fs_reg(brw_imm_v(0x11001100)));

      /* As of gen6, we can no longer mix float and int sources.  We have
       * to turn the integer pixel centers into floats for their actual
       * use.
       */
      this->pixel_x = vgrf(glsl_type::float_type);
      this->pixel_y = vgrf(glsl_type::float_type);
      abld.MOV(this->pixel_x, int_pixel_x);
      abld.MOV(this->pixel_y, int_pixel_y);
   }

   abld = bld.annotate("compute pos.w");
   this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0));
   this->wpos_w = vgrf(glsl_type::float_type);
   abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);

   struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
   uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes &
      (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID |
       1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID);

   for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
      uint8_t reg = payload.barycentric_coord_reg[i];
      this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0));

      if (devinfo->needs_unlit_centroid_workaround &&
          (centroid_modes & (1 << i))) {
         /* Get the pixel/sample mask into f0 so that we know which
          * pixels are lit.  Then, for each channel that is unlit,
          * replace the centroid data with non-centroid data.
          */
         bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);

         uint8_t pixel_reg = payload.barycentric_coord_reg[i - 1];

         set_predicate_inv(BRW_PREDICATE_NORMAL, true,
                           bld.half(0).MOV(brw_vec8_grf(reg, 0),
                                           brw_vec8_grf(pixel_reg, 0)));
         set_predicate_inv(BRW_PREDICATE_NORMAL, true,
                           bld.half(0).MOV(brw_vec8_grf(reg + 1, 0),
                                           brw_vec8_grf(pixel_reg + 1, 0)));
         if (dispatch_width == 16) {
            set_predicate_inv(BRW_PREDICATE_NORMAL, true,
                              bld.half(1).MOV(brw_vec8_grf(reg + 2, 0),
                                              brw_vec8_grf(pixel_reg + 2, 0)));
            set_predicate_inv(BRW_PREDICATE_NORMAL, true,
                              bld.half(1).MOV(brw_vec8_grf(reg + 3, 0),
                                              brw_vec8_grf(pixel_reg + 3, 0)));
         }
         assert(dispatch_width != 32); /* not implemented yet */
      }
   }
}