Пример #1
1
void
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   const struct brw_tes_prog_data *tes_prog_data =
      (const struct brw_tes_prog_data *) prog_data;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_tess_coord:
      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
               src_reg(brw_vec8_grf(1, 0))));
      break;
   case nir_intrinsic_load_tess_level_outer:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_ZWZW)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      }
      break;
   case nir_intrinsic_load_tess_level_inner:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  src_reg(ATTR, 1, glsl_type::float_type)));
      }
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TES_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;

   case nir_intrinsic_load_input:
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];
      src_reg header = input_read_header;
      bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
      unsigned first_component = nir_intrinsic_component(instr);
      if (is_64bit)
         first_component /= 2;

      if (indirect_offset.file != BAD_FILE) {
         header = src_reg(this, glsl_type::uvec4_type);
         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
              input_read_header, indirect_offset);
      } else {
         /* Arbitrarily only push up to 24 vec4 slots worth of data,
          * which is 12 registers (since each holds 2 vec4 slots).
          */
         const unsigned max_push_slots = 24;
         if (imm_offset < max_push_slots) {
            const glsl_type *src_glsl_type =
               is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
            src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

            const brw_reg_type dst_reg_type =
               is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
            emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));

            prog_data->urb_read_length =
               MAX2(prog_data->urb_read_length,
                    DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
            break;
         }
      }

      if (!is_64bit) {
         dst_reg temp(this, glsl_type::ivec4_type);
         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         src_reg src = src_reg(temp);
         src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         /* Copy to target.  We might end up with some funky writemasks landing
          * in here, but we really don't want them in the above pseudo-ops.
          */
         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src));
      } else {
         /* For 64-bit we need to load twice as many 32-bit components, and for
          * dvec3/4 we need to emit 2 URB Read messages
          */
         dst_reg temp(this, glsl_type::dvec4_type);
         dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);

         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         if (instr->num_components > 2) {
            read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
                        src_reg(header));
            read->offset = imm_offset + 1;
            read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
         }

         src_reg temp_as_src = src_reg(temp);
         temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         dst_reg shuffled(this, glsl_type::dvec4_type);
         shuffle_64bit_data(shuffled, temp_as_src, false);

         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src_reg(shuffled)));
      }
      break;
   }
   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
Пример #2
0
void
vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
                                       unsigned base_offset,
                                       unsigned first_component,
                                       const src_reg &indirect_offset)
{
   vec4_instruction *inst;

   /* Set up the message header to reference the proper parts of the URB */
   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
   inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
               brw_imm_ud(dst.writemask << first_component), indirect_offset);
   inst->force_writemask_all = true;

   vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
   read->offset = base_offset;
   read->mlen = 1;
   read->base_mrf = -1;

   if (first_component) {
      /* Read into a temporary and copy with a swizzle and writemask. */
      read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type);
      emit(MOV(dst, swizzle(src_reg(read->dst),
                            BRW_SWZ_COMP_INPUT(first_component))));
   }
}
Пример #3
0
void
vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
                                      const src_reg &vertex_index,
                                      unsigned base_offset,
                                      unsigned first_component,
                                      const src_reg &indirect_offset)
{
   vec4_instruction *inst;
   dst_reg temp(this, glsl_type::ivec4_type);
   temp.type = dst.type;

   /* Set up the message header to reference the proper parts of the URB */
   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
   inst = emit(TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
               indirect_offset);
   inst->force_writemask_all = true;

   /* Read into a temporary, ignoring writemasking. */
   inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
   inst->offset = base_offset;
   inst->mlen = 1;
   inst->base_mrf = -1;

   /* Copy the temporary to the destination to deal with writemasking.
    *
    * Also attempt to deal with gl_PointSize being in the .w component.
    */
   if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
   } else {
      src_reg src = src_reg(temp);
      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
      emit(MOV(dst, src));
   }
}
Пример #4
0
void
vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   dst_reg dest;
   src_reg src;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_per_vertex_input: {
      /* The EmitNoIndirectInput flag guarantees our vertex index will
       * be constant.  We should handle indirects someday.
       */
      nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
      nir_const_value *offset = nir_src_as_const_value(instr->src[1]);

      /* Make up a type...we have no way of knowing... */
      const glsl_type *const type = glsl_type::ivec(instr->num_components);

      src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
                          instr->const_index[0] + offset->u32[0],
                    type);
      src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));

      /* gl_PointSize is passed in the .w component of the VUE header */
      if (instr->const_index[0] == VARYING_SLOT_PSIZ)
         src.swizzle = BRW_SWIZZLE_WWWW;

      dest = get_nir_dest(instr->dest, src.type);
      dest.writemask = brw_writemask_for_size(instr->num_components);
      emit(MOV(dest, src));
      break;
   }

   case nir_intrinsic_load_input:
      unreachable("nir_lower_io should have produced per_vertex intrinsics");

   case nir_intrinsic_emit_vertex_with_counter: {
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      int stream_id = instr->const_index[0];
      gs_emit_vertex(stream_id);
      break;
   }

   case nir_intrinsic_end_primitive_with_counter:
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      gs_end_primitive();
      break;

   case nir_intrinsic_set_vertex_count:
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      break;

   case nir_intrinsic_load_primitive_id:
      assert(gs_prog_data->include_primitive_id);
      dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
      break;

   case nir_intrinsic_load_invocation_id: {
      src_reg invocation_id =
         src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
      assert(invocation_id.file != BAD_FILE);
      dest = get_nir_dest(instr->dest, invocation_id.type);
      emit(MOV(dest, invocation_id));
      break;
   }

   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}