/**
       * Emit an untyped surface atomic opcode.  \p dims determines the number
       * of components of the address and \p rsize the number of components of
       * the returned value (either zero or one).
       */
      fs_reg
      emit_untyped_atomic(const fs_builder &bld,
                          const fs_reg &surface, const fs_reg &addr,
                          const fs_reg &src0, const fs_reg &src1,
                          unsigned dims, unsigned rsize, unsigned op,
                          brw_predicate pred)
      {
         /* FINISHME: Factor out this frequently recurring pattern into a
          * helper function.
          */
         const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
         const fs_reg srcs[] = { src0, src1 };
         const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
         bld.LOAD_PAYLOAD(tmp, srcs, n, 0);

         return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
                          addr, tmp, surface, dims, op, rsize, pred);
      }
Exemple #2
0
void
fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
{
   int slot, urb_offset, length;
   int starting_urb_offset = 0;
   const struct brw_vue_prog_data *vue_prog_data =
      (const struct brw_vue_prog_data *) this->prog_data;
   const struct brw_vs_prog_key *vs_key =
      (const struct brw_vs_prog_key *) this->key;
   const GLbitfield64 psiz_mask =
      VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ;
   const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
   bool flush;
   fs_reg sources[8];

   /* If we don't have any valid slots to write, just do a minimal urb write
    * send to terminate the shader.  This includes 1 slot of undefined data,
    * because it's invalid to write 0 data:
    *
    * From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions -
    * Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read >
    * Write Data Payload:
    *
    *    "The write data payload can be between 1 and 8 message phases long."
    */
   if (vue_map->slots_valid == 0) {
      fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
      bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
                                                BRW_REGISTER_TYPE_UD)));

      fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
      inst->eot = true;
      inst->mlen = 2;
      inst->offset = 1;
      return;
   }

   opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8;
   int header_size = 1;
   fs_reg per_slot_offsets;

   if (stage == MESA_SHADER_GEOMETRY) {
      const struct brw_gs_prog_data *gs_prog_data =
         (const struct brw_gs_prog_data *) this->prog_data;

      /* We need to increment the Global Offset to skip over the control data
       * header and the extra "Vertex Count" field (1 HWord) at the beginning
       * of the VUE.  We're counting in OWords, so the units are doubled.
       */
      starting_urb_offset = 2 * gs_prog_data->control_data_header_size_hwords;
      if (gs_prog_data->static_vertex_count == -1)
         starting_urb_offset += 2;

      /* We also need to use per-slot offsets.  The per-slot offset is the
       * Vertex Count.  SIMD8 mode processes 8 different primitives at a
       * time; each may output a different number of vertices.
       */
      opcode = SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT;
      header_size++;

      /* The URB offset is in 128-bit units, so we need to multiply by 2 */
      const int output_vertex_size_owords =
         gs_prog_data->output_vertex_size_hwords * 2;

      fs_reg offset;
      if (gs_vertex_count.file == IMM) {
         per_slot_offsets = fs_reg(output_vertex_size_owords *
                                   gs_vertex_count.ud);
      } else {
         per_slot_offsets = vgrf(glsl_type::int_type);
         bld.MUL(per_slot_offsets, gs_vertex_count,
                 fs_reg(output_vertex_size_owords));
      }
   }

   length = 0;
   urb_offset = starting_urb_offset;
   flush = false;
   for (slot = 0; slot < vue_map->num_slots; slot++) {
      int varying = vue_map->slot_to_varying[slot];
      switch (varying) {
      case VARYING_SLOT_PSIZ: {
         /* The point size varying slot is the vue header and is always in the
          * vue map.  But often none of the special varyings that live there
          * are written and in that case we can skip writing to the vue
          * header, provided the corresponding state properly clamps the
          * values further down the pipeline. */
         if ((vue_map->slots_valid & psiz_mask) == 0) {
            assert(length == 0);
            urb_offset++;
            break;
         }

         fs_reg zero(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
         bld.MOV(zero, fs_reg(0u));

         sources[length++] = zero;
         if (vue_map->slots_valid & VARYING_BIT_LAYER)
            sources[length++] = this->outputs[VARYING_SLOT_LAYER];
         else
            sources[length++] = zero;

         if (vue_map->slots_valid & VARYING_BIT_VIEWPORT)
            sources[length++] = this->outputs[VARYING_SLOT_VIEWPORT];
         else
            sources[length++] = zero;

         if (vue_map->slots_valid & VARYING_BIT_PSIZ)
            sources[length++] = this->outputs[VARYING_SLOT_PSIZ];
         else
            sources[length++] = zero;
         break;
      }
      case BRW_VARYING_SLOT_NDC:
      case VARYING_SLOT_EDGE:
         unreachable("unexpected scalar vs output");
         break;

      default:
         /* gl_Position is always in the vue map, but isn't always written by
          * the shader.  Other varyings (clip distances) get added to the vue
          * map but don't always get written.  In those cases, the
          * corresponding this->output[] slot will be invalid we and can skip
          * the urb write for the varying.  If we've already queued up a vue
          * slot for writing we flush a mlen 5 urb write, otherwise we just
          * advance the urb_offset.
          */
         if (varying == BRW_VARYING_SLOT_PAD ||
             this->outputs[varying].file == BAD_FILE) {
            if (length > 0)
               flush = true;
            else
               urb_offset++;
            break;
         }

         if (stage == MESA_SHADER_VERTEX && vs_key->clamp_vertex_color &&
             (varying == VARYING_SLOT_COL0 ||
              varying == VARYING_SLOT_COL1 ||
              varying == VARYING_SLOT_BFC0 ||
              varying == VARYING_SLOT_BFC1)) {
            /* We need to clamp these guys, so do a saturating MOV into a
             * temp register and use that for the payload.
             */
            for (int i = 0; i < 4; i++) {
               fs_reg reg = fs_reg(VGRF, alloc.allocate(1), outputs[varying].type);
               fs_reg src = offset(this->outputs[varying], bld, i);
               set_saturate(true, bld.MOV(reg, src));
               sources[length++] = reg;
            }
         } else {
            for (unsigned i = 0; i < output_components[varying]; i++)
               sources[length++] = offset(this->outputs[varying], bld, i);
            for (unsigned i = output_components[varying]; i < 4; i++)
               sources[length++] = fs_reg(0);
         }
         break;
      }

      const fs_builder abld = bld.annotate("URB write");

      /* If we've queued up 8 registers of payload (2 VUE slots), if this is
       * the last slot or if we need to flush (see BAD_FILE varying case
       * above), emit a URB write send now to flush out the data.
       */
      int last = slot == vue_map->num_slots - 1;
      if (length == 8 || last)
         flush = true;
      if (flush) {
         fs_reg *payload_sources =
            ralloc_array(mem_ctx, fs_reg, length + header_size);
         fs_reg payload = fs_reg(VGRF, alloc.allocate(length + header_size),
                                 BRW_REGISTER_TYPE_F);
         payload_sources[0] =
            fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));

         if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT)
            payload_sources[1] = per_slot_offsets;

         memcpy(&payload_sources[header_size], sources,
                length * sizeof sources[0]);

         abld.LOAD_PAYLOAD(payload, payload_sources, length + header_size,
                           header_size);

         fs_inst *inst = abld.emit(opcode, reg_undef, payload);
         inst->eot = last && stage == MESA_SHADER_VERTEX;
         inst->mlen = length + header_size;
         inst->offset = urb_offset;
         urb_offset = starting_urb_offset + slot + 1;
         length = 0;
         flush = false;
      }
   }
}