示例#1
1
void
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   const struct brw_tes_prog_data *tes_prog_data =
      (const struct brw_tes_prog_data *) prog_data;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_tess_coord:
      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
               src_reg(brw_vec8_grf(1, 0))));
      break;
   case nir_intrinsic_load_tess_level_outer:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_ZWZW)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      }
      break;
   case nir_intrinsic_load_tess_level_inner:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  src_reg(ATTR, 1, glsl_type::float_type)));
      }
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TES_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;

   case nir_intrinsic_load_input:
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];
      src_reg header = input_read_header;
      bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
      unsigned first_component = nir_intrinsic_component(instr);
      if (is_64bit)
         first_component /= 2;

      if (indirect_offset.file != BAD_FILE) {
         header = src_reg(this, glsl_type::uvec4_type);
         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
              input_read_header, indirect_offset);
      } else {
         /* Arbitrarily only push up to 24 vec4 slots worth of data,
          * which is 12 registers (since each holds 2 vec4 slots).
          */
         const unsigned max_push_slots = 24;
         if (imm_offset < max_push_slots) {
            const glsl_type *src_glsl_type =
               is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
            src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

            const brw_reg_type dst_reg_type =
               is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
            emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));

            prog_data->urb_read_length =
               MAX2(prog_data->urb_read_length,
                    DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
            break;
         }
      }

      if (!is_64bit) {
         dst_reg temp(this, glsl_type::ivec4_type);
         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         src_reg src = src_reg(temp);
         src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         /* Copy to target.  We might end up with some funky writemasks landing
          * in here, but we really don't want them in the above pseudo-ops.
          */
         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src));
      } else {
         /* For 64-bit we need to load twice as many 32-bit components, and for
          * dvec3/4 we need to emit 2 URB Read messages
          */
         dst_reg temp(this, glsl_type::dvec4_type);
         dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);

         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         if (instr->num_components > 2) {
            read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
                        src_reg(header));
            read->offset = imm_offset + 1;
            read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
         }

         src_reg temp_as_src = src_reg(temp);
         temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         dst_reg shuffled(this, glsl_type::dvec4_type);
         shuffle_64bit_data(shuffled, temp_as_src, false);

         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src_reg(shuffled)));
      }
      break;
   }
   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
示例#2
0
void
vec4_vs_visitor::emit_thread_end()
{
   setup_uniform_clipplane_values();

   /* Lower legacy ff and ClipVertex clipping to clip distances */
   if (key->nr_userclip_plane_consts > 0) {
      current_annotation = "user clip distances";

      output_reg[VARYING_SLOT_CLIP_DIST0][0] =
         dst_reg(this, glsl_type::vec4_type);
      output_reg[VARYING_SLOT_CLIP_DIST1][0] =
         dst_reg(this, glsl_type::vec4_type);
      output_num_components[VARYING_SLOT_CLIP_DIST0][0] = 4;
      output_num_components[VARYING_SLOT_CLIP_DIST1][0] = 4;

      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0][0], 0);
      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1][0], 4);
   }

   /* For VS, we always end the thread by emitting a single vertex.
    * emit_urb_write_opcode() will take care of setting the eot flag on the
    * SEND instruction.
    */
   emit_vertex();
}
示例#3
0
void
vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
                                       unsigned base_offset,
                                       unsigned first_component,
                                       const src_reg &indirect_offset)
{
   vec4_instruction *inst;

   /* Set up the message header to reference the proper parts of the URB */
   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
   inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
               brw_imm_ud(dst.writemask << first_component), indirect_offset);
   inst->force_writemask_all = true;

   vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
   read->offset = base_offset;
   read->mlen = 1;
   read->base_mrf = -1;

   if (first_component) {
      /* Read into a temporary and copy with a swizzle and writemask. */
      read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type);
      emit(MOV(dst, swizzle(src_reg(read->dst),
                            BRW_SWZ_COMP_INPUT(first_component))));
   }
}
示例#4
0
void
vec4_gs_visitor::gs_end_primitive()
{
    /* We can only do EndPrimitive() functionality when the control data
     * consists of cut bits.  Fortunately, the only time it isn't is when the
     * output type is points, in which case EndPrimitive() is a no-op.
     */
    if (gs_prog_data->control_data_format !=
            GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
        return;
    }

    if (c->control_data_header_size_bits == 0)
        return;

    /* Cut bits use one bit per vertex. */
    assert(c->control_data_bits_per_vertex == 1);

    /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting
     * vertex n, 0 otherwise.  So all we need to do here is mark bit
     * (vertex_count - 1) % 32 in the cut_bits register to indicate that
     * EndPrimitive() was called after emitting vertex (vertex_count - 1);
     * vec4_gs_visitor::emit_control_data_bits() will take care of the rest.
     *
     * Note that if EndPrimitve() is called before emitting any vertices, this
     * will cause us to set bit 31 of the control_data_bits register to 1.
     * That's fine because:
     *
     * - If max_vertices < 32, then vertex number 31 (zero-based) will never be
     *   output, so the hardware will ignore cut bit 31.
     *
     * - If max_vertices == 32, then vertex number 31 is guaranteed to be the
     *   last vertex, so setting cut bit 31 has no effect (since the primitive
     *   is automatically ended when the GS terminates).
     *
     * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the
     *   control_data_bits register to 0 when the first vertex is emitted.
     */

    /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
    src_reg one(this, glsl_type::uint_type);
    emit(MOV(dst_reg(one), brw_imm_ud(1u)));
    src_reg prev_count(this, glsl_type::uint_type);
    emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
    src_reg mask(this, glsl_type::uint_type);
    /* Note: we're relying on the fact that the GEN SHL instruction only pays
     * attention to the lower 5 bits of its second source argument, so on this
     * architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
     * ((vertex_count - 1) % 32).
     */
    emit(SHL(dst_reg(mask), one, prev_count));
    emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
示例#5
0
void
gen6_gs_visitor::visit(ir_end_primitive *)
{
   this->current_annotation = "gen6 end primitive";
   /* Calling EndPrimitive() is optional for point output. In this case we set
    * the PrimEnd flag when we process EmitVertex().
    */
   if (c->gp->program.OutputType == GL_POINTS)
      return;

   /* Otherwise we know that the last vertex we have processed was the last
    * vertex in the primitive and we need to set its PrimEnd flag, so do this
    * unless we haven't emitted that vertex at all (vertex_count != 0).
    *
    * Notice that we have already incremented vertex_count when we processed
    * the last emit_vertex, so we need to take that into account in the
    * comparison below (hence the num_output_vertices + 1 in the comparison
    * below).
    */
   unsigned num_output_vertices = c->gp->program.VerticesOut;
   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
            BRW_CONDITIONAL_L));
   vec4_instruction *inst = emit(CMP(dst_null_d(),
                                     this->vertex_count, 0u,
                                     BRW_CONDITIONAL_NEQ));
   inst->predicate = BRW_PREDICATE_NORMAL;
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      /* vertex_output_offset is already pointing at the first entry of the
       * next vertex. So subtract 1 to modify the flags for the previous
       * vertex.
       */
      src_reg offset(this, glsl_type::uint_type);
      emit(ADD(dst_reg(offset), this->vertex_output_offset, src_reg(-1)));

      src_reg dst(this->vertex_output);
      dst.reladdr = ralloc(mem_ctx, src_reg);
      memcpy(dst.reladdr, &offset, sizeof(src_reg));

      emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END));
      emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));

      /* Set the first vertex flag to indicate that the next vertex will start
       * a primitive.
       */
      emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
   }
   emit(BRW_OPCODE_ENDIF);
}
示例#6
0
void
vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
                                      const src_reg &vertex_index,
                                      unsigned base_offset,
                                      unsigned first_component,
                                      const src_reg &indirect_offset)
{
   vec4_instruction *inst;
   dst_reg temp(this, glsl_type::ivec4_type);
   temp.type = dst.type;

   /* Set up the message header to reference the proper parts of the URB */
   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
   inst = emit(TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
               indirect_offset);
   inst->force_writemask_all = true;

   /* Read into a temporary, ignoring writemasking. */
   inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
   inst->offset = base_offset;
   inst->mlen = 1;
   inst->base_mrf = -1;

   /* Copy the temporary to the destination to deal with writemasking.
    *
    * Also attempt to deal with gl_PointSize being in the .w component.
    */
   if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
   } else {
      src_reg src = src_reg(temp);
      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
      emit(MOV(dst, src));
   }
}
示例#7
0
void
vec4_tes_visitor::emit_prolog()
{
   input_read_header = src_reg(this, glsl_type::uvec4_type);
   emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));

   this->current_annotation = NULL;
}
示例#8
0
void
vec4_tcs_visitor::emit_thread_end()
{
   vec4_instruction *inst;
   current_annotation = "thread end";

   if (nir->info->tcs.vertices_out % 2) {
      emit(BRW_OPCODE_ENDIF);
   }

   if (devinfo->gen == 7) {
      struct brw_tcs_prog_data *tcs_prog_data =
         (struct brw_tcs_prog_data *) prog_data;

      current_annotation = "release input vertices";

      /* Synchronize all threads, so we know that no one is still
       * using the input URB handles.
       */
      if (tcs_prog_data->instances > 1) {
         dst_reg header = dst_reg(this, glsl_type::uvec4_type);
         emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
         emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
      }

      /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles.
       * We want to compare the bottom half of invocation_id with 0, but
       * use that truth value for the top half as well.  Unfortunately,
       * we don't have stride in the vec4 world, nor UV immediates in
       * align16, so we need an opcode to get invocation_id<0,4,0>.
       */
      set_condmod(BRW_CONDITIONAL_Z,
                  emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(),
                       invocation_id));
      emit(IF(BRW_PREDICATE_NORMAL));
      for (unsigned i = 0; i < key->input_vertices; i += 2) {
         /* If we have an odd number of input vertices, the last will be
          * unpaired.  We don't want to use an interleaved URB write in
          * that case.
          */
         const bool is_unpaired = i == key->input_vertices - 1;

         dst_reg header(this, glsl_type::uvec4_type);
         emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i),
              brw_imm_ud(is_unpaired));
      }
      emit(BRW_OPCODE_ENDIF);
   }

   if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
      emit_shader_time_end();

   inst = emit(TCS_OPCODE_THREAD_END);
   inst->base_mrf = 14;
   inst->mlen = 2;
}
示例#9
0
void
vec4_gs_visitor::emit_prolog()
{
    /* In vertex shaders, r0.2 is guaranteed to be initialized to zero.  In
     * geometry shaders, it isn't (it contains a bunch of information we don't
     * need, like the input primitive type).  We need r0.2 to be zero in order
     * to build scratch read/write messages correctly (otherwise this value
     * will be interpreted as a global offset, causing us to do our scratch
     * reads/writes to garbage memory).  So just set it to zero at the top of
     * the shader.
     */
    this->current_annotation = "clear r0.2";
    dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
    vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
    inst->force_writemask_all = true;

    /* Create a virtual register to hold the vertex count */
    this->vertex_count = src_reg(this, glsl_type::uint_type);

    /* Initialize the vertex_count register to 0 */
    this->current_annotation = "initialize vertex_count";
    inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
    inst->force_writemask_all = true;

    if (c->control_data_header_size_bits > 0) {
        /* Create a virtual register to hold the current set of control data
         * bits.
         */
        this->control_data_bits = src_reg(this, glsl_type::uint_type);

        /* If we're outputting more than 32 control data bits, then EmitVertex()
         * will set control_data_bits to 0 after emitting the first vertex.
         * Otherwise, we need to initialize it to 0 here.
         */
        if (c->control_data_header_size_bits <= 32) {
            this->current_annotation = "initialize control data bits";
            inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
            inst->force_writemask_all = true;
        }
    }

    this->current_annotation = NULL;
}
示例#10
0
文件: brw_wm_fp.c 项目: nikai3d/mesa
static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
{
   int bit = ffs( ~c->fp_temp );

   if (!bit) {
      debug_printf("%s: out of temporaries\n", __FILE__);
   }

   c->fp_temp |= 1<<(bit-1);
   return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
}
示例#11
0
static struct prog_dst_register get_temp( struct brw_wm_compile *c )
{
   int bit = _mesa_ffs( ~c->fp_temp );

   if (!bit) {
      printf("%s: out of temporaries\n", __FILE__);
      exit(1);
   }

   c->fp_temp |= 1<<(bit-1);
   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
}
示例#12
0
void
vec4_vs_visitor::setup_uniform_clipplane_values()
{
   for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
      this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
      this->userplane[i].type = BRW_REGISTER_TYPE_F;
      for (int j = 0; j < 4; ++j) {
         stage_prog_data->param[this->uniforms * 4 + j] =
            (gl_constant_value *) &clip_planes[i][j];
      }
      ++this->uniforms;
   }
}
示例#13
0
void
gen6_gs_visitor::emit_urb_write_header(int mrf)
{
   this->current_annotation = "gen6 urb header";
   /* Compute offset of the flags for the current vertex in vertex_output and
    * write them in dw2 of the message header.
    *
    * Notice that by the time that emit_thread_end() calls here
    * vertex_output_offset should point to the first data item of the current
    * vertex in vertex_output, thus we only need to add the number of output
    * slots per vertex to that offset to obtain the flags data offset.
    */
   src_reg flags_offset(this, glsl_type::uint_type);
   emit(ADD(dst_reg(flags_offset),
            this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots)));

   src_reg flags_data(this->vertex_output);
   flags_data.reladdr = ralloc(mem_ctx, src_reg);
   memcpy(flags_data.reladdr, &flags_offset, sizeof(src_reg));

   emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
}
示例#14
0
void
vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
{
   /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */

   /* Note: we are calling this *before* increasing vertex_count, so
    * this->vertex_count == vertex_count - 1 in the formula above.
    */

   /* Stream mode uses 2 bits per vertex */
   assert(c->control_data_bits_per_vertex == 2);

   /* Must be a valid stream */
   assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);

   /* Control data bits are initialized to 0 so we don't have to set any
    * bits when sending vertices to stream 0.
    */
   if (stream_id == 0)
      return;

   /* reg::sid = stream_id */
   src_reg sid(this, glsl_type::uint_type);
   emit(MOV(dst_reg(sid), stream_id));

   /* reg:shift_count = 2 * (vertex_count - 1) */
   src_reg shift_count(this, glsl_type::uint_type);
   emit(SHL(dst_reg(shift_count), this->vertex_count, 1u));

   /* Note: we're relying on the fact that the GEN SHL instruction only pays
    * attention to the lower 5 bits of its second source argument, so on this
    * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to
    * stream_id << ((2 * (vertex_count - 1)) % 32).
    */
   src_reg mask(this, glsl_type::uint_type);
   emit(SHL(dst_reg(mask), sid, shift_count));
   emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
示例#15
0
void
vec4_tcs_visitor::emit_urb_write(const src_reg &value,
                                 unsigned writemask,
                                 unsigned base_offset,
                                 const src_reg &indirect_offset)
{
   if (writemask == 0)
      return;

   src_reg message(this, glsl_type::uvec4_type, 2);
   vec4_instruction *inst;

   inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
               brw_imm_ud(writemask), indirect_offset);
   inst->force_writemask_all = true;
   inst = emit(MOV(offset(dst_reg(retype(message, value.type)), 1), value));
   inst->force_writemask_all = true;

   inst = emit(TCS_OPCODE_URB_WRITE, dst_null_f(), message);
   inst->offset = base_offset;
   inst->mlen = 2;
   inst->base_mrf = -1;
}
示例#16
0
static void fog_blend( struct brw_wm_compile *c,
			     struct prog_src_register fog_factor )
{
   struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
   struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );

   /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
   
   emit_op(c, 
	   OPCODE_LRP,
	   dst_mask(outcolor, WRITEMASK_XYZ),
	   0, 0, 0,
	   fog_factor,
	   src_reg_from_dst(outcolor),
	   fogcolor);
}
示例#17
0
void
vec4_tcs_visitor::emit_prolog()
{
   invocation_id = src_reg(this, glsl_type::uint_type);
   emit(TCS_OPCODE_GET_INSTANCE_ID, dst_reg(invocation_id));

   /* HS threads are dispatched with the dispatch mask set to 0xFF.
    * If there are an odd number of output vertices, then the final
    * HS instance dispatched will only have its bottom half doing real
    * work, and so we need to disable the upper half:
    */
   if (nir->info->tcs.vertices_out % 2) {
      emit(CMP(dst_null_d(), invocation_id,
               brw_imm_ud(nir->info->tcs.vertices_out), BRW_CONDITIONAL_L));

      /* Matching ENDIF is in emit_thread_end() */
      emit(IF(BRW_PREDICATE_NORMAL));
   }
}
示例#18
0
void
vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
                                       unsigned base_offset,
                                       const src_reg &indirect_offset)
{
   vec4_instruction *inst;

   /* Set up the message header to reference the proper parts of the URB */
   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
   inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
               brw_imm_ud(dst.writemask), indirect_offset);
   inst->force_writemask_all = true;

   /* Read into a temporary, ignoring writemasking. */
   vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
   read->offset = base_offset;
   read->mlen = 1;
   read->base_mrf = -1;
}
示例#19
0
void
gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
                                       int last_mrf, int urb_offset)
{
   vec4_instruction *inst = NULL;

   if (!complete) {
      /* If the vertex is not complete we don't have to do anything special */
      inst = emit(GS_OPCODE_URB_WRITE);
      inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
   } else {
      /* Otherwise we always request to allocate a new VUE handle. If this is
       * the last write before the EOT message and the new handle never gets
       * used it will be dereferenced when we send the EOT message. This is
       * necessary to avoid different setups for the EOT message (one for the
       * case when there is no output and another for the case when there is)
       * which would require to end the program with an IF/ELSE/ENDIF block,
       * something we do not want.
       */
      inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
      inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
      inst->dst = dst_reg(MRF, base_mrf);
      inst->src[0] = this->temp;
   }

   inst->base_mrf = base_mrf;
   /* URB data written (does not include the message header reg) must
    * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
    * section 5.4.3.2.2: URB_INTERLEAVED.
    */
   int mlen = last_mrf - base_mrf;
   if ((mlen % 2) != 1)
      mlen++;
   inst->mlen = mlen;
   inst->offset = urb_offset;
}
示例#20
0
void
vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   switch (instr->intrinsic) {
   case nir_intrinsic_load_invocation_id:
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD),
               invocation_id));
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TCS_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;
   case nir_intrinsic_load_patch_vertices_in:
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
               brw_imm_d(key->input_vertices)));
      break;
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
      src_reg vertex_index =
         vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0]))
                      : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);

      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      dst.writemask = brw_writemask_for_size(instr->num_components);

      emit_input_urb_read(dst, vertex_index, imm_offset,
                          nir_intrinsic_component(instr), indirect_offset);
      break;
   }
   case nir_intrinsic_load_input:
      unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
      break;
   case nir_intrinsic_load_output:
   case nir_intrinsic_load_per_vertex_output: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      dst.writemask = brw_writemask_for_size(instr->num_components);

      if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
         dst.type = BRW_REGISTER_TYPE_F;

         /* This is a read of gl_TessLevelInner[], which lives in the
          * Patch URB header.  The layout depends on the domain.
          */
         switch (key->tes_primitive_mode) {
         case GL_QUADS: {
            /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
            dst_reg tmp(this, glsl_type::vec4_type);
            emit_output_urb_read(tmp, 0, 0, src_reg());
            emit(MOV(writemask(dst, WRITEMASK_XY),
                     swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
            break;
         }
         case GL_TRIANGLES:
            /* DWord 4; use offset 1 but normal swizzle/writemask. */
            emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0,
                                 src_reg());
            break;
         case GL_ISOLINES:
            /* All channels are undefined. */
            return;
         default:
            unreachable("Bogus tessellation domain");
         }
      } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
         dst.type = BRW_REGISTER_TYPE_F;
         unsigned swiz = BRW_SWIZZLE_WZYX;

         /* This is a read of gl_TessLevelOuter[], which lives in the
          * high 4 DWords of the Patch URB header, in reverse order.
          */
         switch (key->tes_primitive_mode) {
         case GL_QUADS:
            dst.writemask = WRITEMASK_XYZW;
            break;
         case GL_TRIANGLES:
            dst.writemask = WRITEMASK_XYZ;
            break;
         case GL_ISOLINES:
            /* Isolines are not reversed; swizzle .zw -> .xy */
            swiz = BRW_SWIZZLE_ZWZW;
            dst.writemask = WRITEMASK_XY;
            return;
         default:
            unreachable("Bogus tessellation domain");
         }

         dst_reg tmp(this, glsl_type::vec4_type);
         emit_output_urb_read(tmp, 1, 0, src_reg());
         emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
      } else {
         emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
                              indirect_offset);
      }
      break;
   }
   case nir_intrinsic_store_output:
   case nir_intrinsic_store_per_vertex_output: {
      src_reg value = get_nir_src(instr->src[0]);
      unsigned mask = instr->const_index[1];
      unsigned swiz = BRW_SWIZZLE_XYZW;

      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      /* The passthrough shader writes the whole patch header as two vec4s;
       * skip all the gl_TessLevelInner/Outer swizzling.
       */
      if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) {
         if (imm_offset == 0) {
            value.type = BRW_REGISTER_TYPE_F;

            mask &=
               (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1;

            /* This is a write to gl_TessLevelInner[], which lives in the
             * Patch URB header.  The layout depends on the domain.
             */
            switch (key->tes_primitive_mode) {
            case GL_QUADS:
               /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
                * We use an XXYX swizzle to reverse put .xy in the .wz
                * channels, and use a .zw writemask.
                */
               swiz = BRW_SWIZZLE4(0, 0, 1, 0);
               mask = writemask_for_backwards_vector(mask);
               break;
            case GL_TRIANGLES:
               /* gl_TessLevelInner[].x lives at DWord 4, so we set the
                * writemask to X and bump the URB offset by 1.
                */
               imm_offset = 1;
               break;
            case GL_ISOLINES:
               /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
               return;
            default:
               unreachable("Bogus tessellation domain");
            }
         } else if (imm_offset == 1) {
            value.type = BRW_REGISTER_TYPE_F;

            mask &=
               (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1;

            /* This is a write to gl_TessLevelOuter[] which lives in the
             * Patch URB Header at DWords 4-7.  However, it's reversed, so
             * instead of .xyzw we have .wzyx.
             */
            if (key->tes_primitive_mode == GL_ISOLINES) {
               /* Isolines .xy should be stored in .zw, in order. */
               swiz = BRW_SWIZZLE4(0, 0, 0, 1);
               mask <<= 2;
            } else {
               /* Other domains are reversed; store .wzyx instead of .xyzw. */
               swiz = BRW_SWIZZLE_WZYX;
               mask = writemask_for_backwards_vector(mask);
            }
         }
      }

      unsigned first_component = nir_intrinsic_component(instr);
      if (first_component) {
         assert(swiz == BRW_SWIZZLE_XYZW);
         swiz = BRW_SWZ_COMP_OUTPUT(first_component);
         mask = mask << first_component;
      }

      emit_urb_write(swizzle(value, swiz), mask,
                     imm_offset, indirect_offset);
      break;
   }

   case nir_intrinsic_barrier: {
      dst_reg header = dst_reg(this, glsl_type::uvec4_type);
      emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
      emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
      break;
   }

   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
示例#21
0
static void emit_interp( struct brw_wm_compile *c,
			 GLuint idx )
{
   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
   struct prog_src_register deltas = get_delta_xy(c);
   struct prog_src_register arg2;
   GLuint opcode;
   
   /* Need to use PINTERP on attributes which have been
    * multiplied by 1/W in the SF program, and LINTERP on those
    * which have not:
    */
   switch (idx) {
   case FRAG_ATTRIB_WPOS:
      opcode = WM_LINTERP;
      arg2 = src_undef();

      /* Have to treat wpos.xy specially:
       */
      emit_op(c,
	      WM_WPOSXY,
	      dst_mask(dst, WRITEMASK_XY),
	      0, 0, 0,
	      get_pixel_xy(c),
	      src_undef(),
	      src_undef());
      
      dst = dst_mask(dst, WRITEMASK_ZW);

      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
       */
      emit_op(c,
	      WM_LINTERP,
	      dst,
	      0, 0, 0,
	      interp,
	      deltas,
	      arg2);
      break;
   case FRAG_ATTRIB_COL0:
   case FRAG_ATTRIB_COL1:
      if (c->key.flat_shade) {
	 emit_op(c,
		 WM_CINTERP,
		 dst,
		 0, 0, 0,
		 interp,
		 src_undef(),
		 src_undef());
      }
      else {
	 emit_op(c,
		 WM_LINTERP,
		 dst,
		 0, 0, 0,
		 interp,
		 deltas,
		 src_undef());
      }
      break;
   default:
      emit_op(c,
	      WM_PINTERP,
	      dst,
	      0, 0, 0,
	      interp,
	      deltas,
	      get_pixel_w(c));
      break;
   }

   c->fp_interp_emitted |= 1<<idx;
}
示例#22
0
static void emit_interp( struct brw_wm_compile *c,
			 GLuint idx )
{
   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
   struct prog_src_register deltas = get_delta_xy(c);

   /* Need to use PINTERP on attributes which have been
    * multiplied by 1/W in the SF program, and LINTERP on those
    * which have not:
    */
   switch (idx) {
   case FRAG_ATTRIB_WPOS:
      /* Have to treat wpos.xy specially:
       */
      emit_op(c,
	      WM_WPOSXY,
	      dst_mask(dst, WRITEMASK_XY),
	      0,
	      get_pixel_xy(c),
	      src_undef(),
	      src_undef());
      
      dst = dst_mask(dst, WRITEMASK_ZW);

      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
       */
      emit_op(c,
	      WM_LINTERP,
	      dst,
	      0,
	      interp,
	      deltas,
	      src_undef());
      break;
   case FRAG_ATTRIB_COL0:
   case FRAG_ATTRIB_COL1:
      if (c->key.flat_shade) {
	 emit_op(c,
		 WM_CINTERP,
		 dst,
		 0,
		 interp,
		 src_undef(),
		 src_undef());
      }
      else {
         if (c->key.linear_color) {
            emit_op(c,
                    WM_LINTERP,
                    dst,
                    0,
                    interp,
                    deltas,
                    src_undef());
         }
         else {
            /* perspective-corrected color interpolation */
            emit_op(c,
                    WM_PINTERP,
                    dst,
                    0,
                    interp,
                    deltas,
                    get_pixel_w(c));
         }
      }
      break;
   case FRAG_ATTRIB_FOGC:
      /* Interpolate the fog coordinate */
      emit_op(c,
	      WM_PINTERP,
	      dst_mask(dst, WRITEMASK_X),
	      0,
	      interp,
	      deltas,
	      get_pixel_w(c));

      emit_op(c,
	      OPCODE_MOV,
	      dst_mask(dst, WRITEMASK_YZW),
	      0,
	      src_swizzle(interp,
			  SWIZZLE_ZERO,
			  SWIZZLE_ZERO,
			  SWIZZLE_ZERO,
			  SWIZZLE_ONE),
	      src_undef(),
	      src_undef());
      break;

   case FRAG_ATTRIB_FACE:
      emit_op(c,
              WM_FRONTFACING,
              dst_mask(dst, WRITEMASK_X),
              0,
              src_undef(),
              src_undef(),
              src_undef());
      break;

   case FRAG_ATTRIB_PNTC:
      /* XXX review/test this case */
      emit_op(c,
	      WM_PINTERP,
	      dst_mask(dst, WRITEMASK_XY),
	      0,
	      interp,
	      deltas,
	      get_pixel_w(c));

      emit_op(c,
	      OPCODE_MOV,
	      dst_mask(dst, WRITEMASK_ZW),
	      0,
	      src_swizzle(interp,
			  SWIZZLE_ZERO,
			  SWIZZLE_ZERO,
			  SWIZZLE_ZERO,
			  SWIZZLE_ONE),
	      src_undef(),
	      src_undef());
      break;

   default:
      emit_op(c,
	      WM_PINTERP,
	      dst,
	      0,
	      interp,
	      deltas,
	      get_pixel_w(c));
      break;
   }

   c->fp_interp_emitted |= 1<<idx;
}
示例#23
0
static struct prog_dst_register dst_undef( void )
{
   return dst_reg(PROGRAM_UNDEFINED, 0);
}
示例#24
0
void
vec4_gs_visitor::visit(ir_emit_vertex *ir)
{
   this->current_annotation = "emit vertex: safety check";

   /* To ensure that we don't output more vertices than the shader specified
    * using max_vertices, do the logic inside a conditional of the form "if
    * (vertex_count < MAX)"
    */
   unsigned num_output_vertices = c->gp->program.VerticesOut;
   emit(CMP(dst_null_d(), this->vertex_count,
            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      /* If we're outputting 32 control data bits or less, then we can wait
       * until the shader is over to output them all.  Otherwise we need to
       * output them as we go.  Now is the time to do it, since we're about to
       * output the vertex_count'th vertex, so it's guaranteed that the
       * control data bits associated with the (vertex_count - 1)th vertex are
       * correct.
       */
      if (c->control_data_header_size_bits > 32) {
         this->current_annotation = "emit vertex: emit control data bits";
         /* Only emit control data bits if we've finished accumulating a batch
          * of 32 bits.  This is the case when:
          *
          *     (vertex_count * bits_per_vertex) % 32 == 0
          *
          * (in other words, when the last 5 bits of vertex_count *
          * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
          * integer n (which is always the case, since bits_per_vertex is
          * always 1 or 2), this is equivalent to requiring that the last 5-n
          * bits of vertex_count are 0:
          *
          *     vertex_count & (2^(5-n) - 1) == 0
          *
          * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
          * equivalent to:
          *
          *     vertex_count & (32 / bits_per_vertex - 1) == 0
          */
         vec4_instruction *inst =
            emit(AND(dst_null_d(), this->vertex_count,
                     (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
         inst->conditional_mod = BRW_CONDITIONAL_Z;
         emit(IF(BRW_PREDICATE_NORMAL));
         {
            emit_control_data_bits();

            /* Reset control_data_bits to 0 so we can start accumulating a new
             * batch.
             *
             * Note: in the case where vertex_count == 0, this neutralizes the
             * effect of any call to EndPrimitive() that the shader may have
             * made before outputting its first vertex.
             */
            inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
            inst->force_writemask_all = true;
         }
         emit(BRW_OPCODE_ENDIF);
      }

      this->current_annotation = "emit vertex: vertex data";
      emit_vertex();

      /* In stream mode we have to set control data bits for all vertices
       * unless we have disabled control data bits completely (which we do
       * do for GL_POINTS outputs that don't use streams).
       */
      if (c->control_data_header_size_bits > 0 &&
          c->prog_data.control_data_format ==
             GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
          this->current_annotation = "emit vertex: Stream control data bits";
          set_stream_control_data_bits(ir->stream_id());
      }

      this->current_annotation = "emit vertex: increment vertex count";
      emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
               src_reg(1u)));
   }
   emit(BRW_OPCODE_ENDIF);

   this->current_annotation = NULL;
}
示例#25
0
文件: brw_wm_fp.c 项目: nikai3d/mesa
static void emit_interp( struct brw_wm_compile *c,
			 GLuint idx,
			 GLuint semantic,
			 GLuint interp_mode )
{
   struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
   struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
   struct brw_fp_src deltas = get_delta_xy(c);

   /* Need to use PINTERP on attributes which have been
    * multiplied by 1/W in the SF program, and LINTERP on those
    * which have not:
    */
   switch (semantic) {
   case TGSI_SEMANTIC_POSITION:
      /* Have to treat wpos.xy specially:
       */
      emit_op1(c,
	      WM_WPOSXY,
	      dst_mask(dst, BRW_WRITEMASK_XY),
	      get_pixel_xy(c));
      
      /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
       */
      emit_op2(c,
	       WM_LINTERP,
	       dst_mask(dst, BRW_WRITEMASK_ZW),
	       interp,
	       deltas);
      break;

   case TGSI_SEMANTIC_COLOR:
      if (c->key.flat_shade) {
	 emit_op1(c,
		 WM_CINTERP,
		 dst,
		 interp);
      }
      else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
	 emit_op2(c,
		  WM_LINTERP,
		  dst,
		  interp,
		  deltas);
      }
      else {
	 emit_op3(c,
		  WM_PINTERP,
		  dst,
		  interp,
		  deltas,
		  get_pixel_w(c));
      }

      break;

   case TGSI_SEMANTIC_FOG:
      /* Interpolate the fog coordinate */
      emit_op3(c,
	      WM_PINTERP,
	      dst_mask(dst, BRW_WRITEMASK_X),
	      interp,
	      deltas,
	      get_pixel_w(c));

      emit_op1(c,
	       TGSI_OPCODE_MOV,
	       dst_mask(dst, BRW_WRITEMASK_YZ),
	       src_imm1f(c, 0.0));

      emit_op1(c,
	       TGSI_OPCODE_MOV,
	       dst_mask(dst, BRW_WRITEMASK_W),
	       src_imm1f(c, 1.0));
      break;

   case TGSI_SEMANTIC_FACE:
      /* XXX review/test this case */
      emit_op0(c,
	       WM_FRONTFACING,
	       dst_mask(dst, BRW_WRITEMASK_X));
      
      emit_op1(c,
	      TGSI_OPCODE_MOV,
	      dst_mask(dst, BRW_WRITEMASK_YZ),
	       src_imm1f(c, 0.0));

      emit_op1(c,
	      TGSI_OPCODE_MOV,
	      dst_mask(dst, BRW_WRITEMASK_W),
	       src_imm1f(c, 1.0));
      break;

   case TGSI_SEMANTIC_PSIZE:
      /* XXX review/test this case */
      emit_op3(c,
	       WM_PINTERP,
	       dst_mask(dst, BRW_WRITEMASK_XY),
	       interp,
	       deltas,
	       get_pixel_w(c));

      emit_op1(c,
	      TGSI_OPCODE_MOV,
	      dst_mask(dst, BRW_WRITEMASK_Z),
	      src_imm1f(c, 0.0f));

      emit_op1(c,
	      TGSI_OPCODE_MOV,
	      dst_mask(dst, BRW_WRITEMASK_W),
	      src_imm1f(c, 1.0f));
      break;

   default: 
      switch (interp_mode) {
      case TGSI_INTERPOLATE_CONSTANT:
	 emit_op1(c,
		  WM_CINTERP,
		  dst,
		  interp);
	 break;

      case TGSI_INTERPOLATE_LINEAR:
	 emit_op2(c,
		  WM_LINTERP,
		  dst,
		  interp,
		  deltas);
	 break;

      case TGSI_INTERPOLATE_PERSPECTIVE:
	 emit_op3(c,
		  WM_PINTERP,
		  dst,
		  interp,
		  deltas,
		  get_pixel_w(c));
	 break;
      }
      break;
   }
}
示例#26
0
文件: brw_wm_fp.c 项目: nikai3d/mesa
static struct brw_fp_dst dst_undef( void )
{
   return dst_reg(TGSI_FILE_NULL, 0);
}
示例#27
0
void
vec4_gs_visitor::emit_prolog()
{
   /* In vertex shaders, r0.2 is guaranteed to be initialized to zero.  In
    * geometry shaders, it isn't (it contains a bunch of information we don't
    * need, like the input primitive type).  We need r0.2 to be zero in order
    * to build scratch read/write messages correctly (otherwise this value
    * will be interpreted as a global offset, causing us to do our scratch
    * reads/writes to garbage memory).  So just set it to zero at the top of
    * the shader.
    */
   this->current_annotation = "clear r0.2";
   dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2_IMMED, r0, 0u);
   inst->force_writemask_all = true;

   /* Create a virtual register to hold the vertex count */
   this->vertex_count = src_reg(this, glsl_type::uint_type);

   /* Initialize the vertex_count register to 0 */
   this->current_annotation = "initialize vertex_count";
   inst = emit(MOV(dst_reg(this->vertex_count), 0u));
   inst->force_writemask_all = true;

   if (c->control_data_header_size_bits > 0) {
      /* Create a virtual register to hold the current set of control data
       * bits.
       */
      this->control_data_bits = src_reg(this, glsl_type::uint_type);

      /* If we're outputting more than 32 control data bits, then EmitVertex()
       * will set control_data_bits to 0 after emitting the first vertex.
       * Otherwise, we need to initialize it to 0 here.
       */
      if (c->control_data_header_size_bits <= 32) {
         this->current_annotation = "initialize control data bits";
         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
         inst->force_writemask_all = true;
      }
   }

   /* If the geometry shader uses the gl_PointSize input, we need to fix it up
    * to account for the fact that the vertex shader stored it in the w
    * component of VARYING_SLOT_PSIZ.
    */
   if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) {
      this->current_annotation = "swizzle gl_PointSize input";
      for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) {
         dst_reg dst(ATTR,
                     BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
         dst.type = BRW_REGISTER_TYPE_F;
         src_reg src(dst);
         dst.writemask = WRITEMASK_X;
         src.swizzle = BRW_SWIZZLE_WWWW;
         inst = emit(MOV(dst, src));

         /* In dual instanced dispatch mode, dst has a width of 4, so we need
          * to make sure the MOV happens regardless of which channels are
          * enabled.
          */
         inst->force_writemask_all = true;
      }
   }

   this->current_annotation = NULL;
}
示例#28
0
void
vec4_gs_visitor::gs_emit_vertex(int stream_id)
{
    this->current_annotation = "emit vertex: safety check";

    /* Haswell and later hardware ignores the "Render Stream Select" bits
     * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled,
     * and instead sends all primitives down the pipeline for rasterization.
     * If the SOL stage is enabled, "Render Stream Select" is honored and
     * primitives bound to non-zero streams are discarded after stream output.
     *
     * Since the only purpose of primives sent to non-zero streams is to
     * be recorded by transform feedback, we can simply discard all geometry
     * bound to these streams when transform feedback is disabled.
     */
    if (stream_id > 0 && !nir->info->has_transform_feedback_varyings)
        return;

    /* If we're outputting 32 control data bits or less, then we can wait
     * until the shader is over to output them all.  Otherwise we need to
     * output them as we go.  Now is the time to do it, since we're about to
     * output the vertex_count'th vertex, so it's guaranteed that the
     * control data bits associated with the (vertex_count - 1)th vertex are
     * correct.
     */
    if (c->control_data_header_size_bits > 32) {
        this->current_annotation = "emit vertex: emit control data bits";
        /* Only emit control data bits if we've finished accumulating a batch
         * of 32 bits.  This is the case when:
         *
         *     (vertex_count * bits_per_vertex) % 32 == 0
         *
         * (in other words, when the last 5 bits of vertex_count *
         * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
         * integer n (which is always the case, since bits_per_vertex is
         * always 1 or 2), this is equivalent to requiring that the last 5-n
         * bits of vertex_count are 0:
         *
         *     vertex_count & (2^(5-n) - 1) == 0
         *
         * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
         * equivalent to:
         *
         *     vertex_count & (32 / bits_per_vertex - 1) == 0
         */
        vec4_instruction *inst =
            emit(AND(dst_null_ud(), this->vertex_count,
                     brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
        inst->conditional_mod = BRW_CONDITIONAL_Z;

        emit(IF(BRW_PREDICATE_NORMAL));
        {
            /* If vertex_count is 0, then no control data bits have been
             * accumulated yet, so we skip emitting them.
             */
            emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
                     BRW_CONDITIONAL_NEQ));
            emit(IF(BRW_PREDICATE_NORMAL));
            emit_control_data_bits();
            emit(BRW_OPCODE_ENDIF);

            /* Reset control_data_bits to 0 so we can start accumulating a new
             * batch.
             *
             * Note: in the case where vertex_count == 0, this neutralizes the
             * effect of any call to EndPrimitive() that the shader may have
             * made before outputting its first vertex.
             */
            inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
            inst->force_writemask_all = true;
        }
        emit(BRW_OPCODE_ENDIF);
    }

    this->current_annotation = "emit vertex: vertex data";
    emit_vertex();

    /* In stream mode we have to set control data bits for all vertices
     * unless we have disabled control data bits completely (which we do
     * do for GL_POINTS outputs that don't use streams).
     */
    if (c->control_data_header_size_bits > 0 &&
            gs_prog_data->control_data_format ==
            GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
        this->current_annotation = "emit vertex: Stream control data bits";
        set_stream_control_data_bits(stream_id);
    }

    this->current_annotation = NULL;
}
示例#29
0
void
vec4_vs_visitor::emit_prolog()
{
   dst_reg sign_recovery_shift;
   dst_reg normalize_factor;
   dst_reg es3_normalize_factor;

   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
      if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
         uint8_t wa_flags = key->gl_attrib_wa_flags[i];
         dst_reg reg(ATTR, i);
         dst_reg reg_d = reg;
         reg_d.type = BRW_REGISTER_TYPE_D;
         dst_reg reg_ud = reg;
         reg_ud.type = BRW_REGISTER_TYPE_UD;

         /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
          * come in as floating point conversions of the integer values.
          */
         if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
            dst_reg dst = reg;
            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
            dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
            emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
         }

         /* Do sign recovery for 2101010 formats if required. */
         if (wa_flags & BRW_ATTRIB_WA_SIGN) {
            if (sign_recovery_shift.file == BAD_FILE) {
               /* shift constant: <22,22,22,30> */
               sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
            }

            emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
            emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
         }

         /* Apply BGRA swizzle if required. */
         if (wa_flags & BRW_ATTRIB_WA_BGRA) {
            src_reg temp = src_reg(reg);
            temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
            emit(MOV(reg, temp));
         }

         if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
            /* ES 3.0 has different rules for converting signed normalized
             * fixed-point numbers than desktop GL.
             */
            if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) {
               /* According to equation 2.2 of the ES 3.0 specification,
                * signed normalization conversion is done by:
                *
                * f = c / (2^(b-1)-1)
                */
               if (es3_normalize_factor.file == BAD_FILE) {
                  /* mul constant: 1 / (2^(b-1) - 1) */
                  es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
                           src_reg(1.0f / ((1<<9) - 1))));
                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
                           src_reg(1.0f / ((1<<1) - 1))));
               }

               dst_reg dst = reg;
               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
               emit(MOV(dst, src_reg(reg_d)));
               emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
               emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f));
            } else {
               /* The following equations are from the OpenGL 3.2 specification:
                *
                * 2.1 unsigned normalization
                * f = c/(2^n-1)
                *
                * 2.2 signed normalization
                * f = (2c+1)/(2^n-1)
                *
                * Both of these share a common divisor, which is represented by
                * "normalize_factor" in the code below.
                */
               if (normalize_factor.file == BAD_FILE) {
                  /* 1 / (2^b - 1) for b=<10,10,10,2> */
                  normalize_factor = dst_reg(this, glsl_type::vec4_type);
                  emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
                           src_reg(1.0f / ((1<<10) - 1))));
                  emit(MOV(writemask(normalize_factor, WRITEMASK_W),
                           src_reg(1.0f / ((1<<2) - 1))));
               }

               dst_reg dst = reg;
               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
               emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));

               /* For signed normalization, we want the numerator to be 2c+1. */
               if (wa_flags & BRW_ATTRIB_WA_SIGN) {
                  emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
                  emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
               }

               emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
            }
         }

         if (wa_flags & BRW_ATTRIB_WA_SCALE) {
            dst_reg dst = reg;
            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
            emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
         }
      }
   }
}
示例#30
0
/**
 * Write out a batch of 32 control data bits from the control_data_bits
 * register to the URB.
 *
 * The current value of the vertex_count register determines which DWORD in
 * the URB receives the control data bits.  The control_data_bits register is
 * assumed to contain the correct data for the vertex that was most recently
 * output, and all previous vertices that share the same DWORD.
 *
 * This function takes care of ensuring that if no vertices have been output
 * yet, no control bits are emitted.
 */
void
vec4_gs_visitor::emit_control_data_bits()
{
   assert(c->control_data_bits_per_vertex != 0);

   /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized)
    * granularity, we need to use two tricks to ensure that the batch of 32
    * control data bits is written to the appropriate DWORD in the URB.  To
    * select which vec4 we are writing to, we use the "slot {0,1} offset"
    * fields of the message header.  To select which DWORD in the vec4 we are
    * writing to, we use the channel mask fields of the message header.  To
    * avoid penalizing geometry shaders that emit a small number of vertices
    * with extra bookkeeping, we only do each of these tricks when
    * c->prog_data.control_data_header_size_bits is large enough to make it
    * necessary.
    *
    * Note: this means that if we're outputting just a single DWORD of control
    * data bits, we'll actually replicate it four times since we won't do any
    * channel masking.  But that's not a problem since in this case the
    * hardware only pays attention to the first DWORD.
    */
   enum brw_urb_write_flags urb_write_flags = BRW_URB_WRITE_OWORD;
   if (c->control_data_header_size_bits > 32)
      urb_write_flags = urb_write_flags | BRW_URB_WRITE_USE_CHANNEL_MASKS;
   if (c->control_data_header_size_bits > 128)
      urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;

   /* If vertex_count is 0, then no control data bits have been accumulated
    * yet, so we should do nothing.
    */
   emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      /* If we are using either channel masks or a per-slot offset, then we
       * need to figure out which DWORD we are trying to write to, using the
       * formula:
       *
       *     dword_index = (vertex_count - 1) * bits_per_vertex / 32
       *
       * Since bits_per_vertex is a power of two, and is known at compile
       * time, this can be optimized to:
       *
       *     dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
       */
      src_reg dword_index(this, glsl_type::uint_type);
      if (urb_write_flags) {
         src_reg prev_count(this, glsl_type::uint_type);
         emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
         unsigned log2_bits_per_vertex =
            _mesa_fls(c->control_data_bits_per_vertex);
         emit(SHR(dst_reg(dword_index), prev_count,
                  (uint32_t) (6 - log2_bits_per_vertex)));
      }

      /* Start building the URB write message.  The first MRF gets a copy of
       * R0.
       */
      int base_mrf = 1;
      dst_reg mrf_reg(MRF, base_mrf);
      src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
      vec4_instruction *inst = emit(MOV(mrf_reg, r0));
      inst->force_writemask_all = true;

      if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
         /* Set the per-slot offset to dword_index / 4, to that we'll write to
          * the appropriate OWORD within the control data header.
          */
         src_reg per_slot_offset(this, glsl_type::uint_type);
         emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
         emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
      }

      if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
         /* Set the channel masks to 1 << (dword_index % 4), so that we'll
          * write to the appropriate DWORD within the OWORD.  We need to do
          * this computation with force_writemask_all, otherwise garbage data
          * from invocation 0 might clobber the mask for invocation 1 when
          * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
          * together.
          */
         src_reg channel(this, glsl_type::uint_type);
         inst = emit(AND(dst_reg(channel), dword_index, 3u));
         inst->force_writemask_all = true;
         src_reg one(this, glsl_type::uint_type);
         inst = emit(MOV(dst_reg(one), 1u));
         inst->force_writemask_all = true;
         src_reg channel_mask(this, glsl_type::uint_type);
         inst = emit(SHL(dst_reg(channel_mask), one, channel));
         inst->force_writemask_all = true;
         emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
                                               channel_mask);
         emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
      }

      /* Store the control data bits in the message payload and send it. */
      dst_reg mrf_reg2(MRF, base_mrf + 1);
      inst = emit(MOV(mrf_reg2, this->control_data_bits));
      inst->force_writemask_all = true;
      inst = emit(GS_OPCODE_URB_WRITE);
      inst->urb_write_flags = urb_write_flags;
      /* We need to increment Global Offset by 256-bits to make room for
       * Broadwell's extra "Vertex Count" payload at the beginning of the
       * URB entry.  Since this is an OWord message, Global Offset is counted
       * in 128-bit units, so we must set it to 2.
       */
      if (brw->gen >= 8)
         inst->offset = 2;
      inst->base_mrf = base_mrf;
      inst->mlen = 2;
   }
   emit(BRW_OPCODE_ENDIF);
}