Ejemplo n.º 1
1
void
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   const struct brw_tes_prog_data *tes_prog_data =
      (const struct brw_tes_prog_data *) prog_data;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_tess_coord:
      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
               src_reg(brw_vec8_grf(1, 0))));
      break;
   case nir_intrinsic_load_tess_level_outer:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_ZWZW)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      }
      break;
   case nir_intrinsic_load_tess_level_inner:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  src_reg(ATTR, 1, glsl_type::float_type)));
      }
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TES_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;

   case nir_intrinsic_load_input:
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];
      src_reg header = input_read_header;
      bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
      unsigned first_component = nir_intrinsic_component(instr);
      if (is_64bit)
         first_component /= 2;

      if (indirect_offset.file != BAD_FILE) {
         header = src_reg(this, glsl_type::uvec4_type);
         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
              input_read_header, indirect_offset);
      } else {
         /* Arbitrarily only push up to 24 vec4 slots worth of data,
          * which is 12 registers (since each holds 2 vec4 slots).
          */
         const unsigned max_push_slots = 24;
         if (imm_offset < max_push_slots) {
            const glsl_type *src_glsl_type =
               is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
            src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

            const brw_reg_type dst_reg_type =
               is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
            emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));

            prog_data->urb_read_length =
               MAX2(prog_data->urb_read_length,
                    DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
            break;
         }
      }

      if (!is_64bit) {
         dst_reg temp(this, glsl_type::ivec4_type);
         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         src_reg src = src_reg(temp);
         src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         /* Copy to target.  We might end up with some funky writemasks landing
          * in here, but we really don't want them in the above pseudo-ops.
          */
         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src));
      } else {
         /* For 64-bit we need to load twice as many 32-bit components, and for
          * dvec3/4 we need to emit 2 URB Read messages
          */
         dst_reg temp(this, glsl_type::dvec4_type);
         dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);

         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         if (instr->num_components > 2) {
            read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
                        src_reg(header));
            read->offset = imm_offset + 1;
            read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
         }

         src_reg temp_as_src = src_reg(temp);
         temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         dst_reg shuffled(this, glsl_type::dvec4_type);
         shuffle_64bit_data(shuffled, temp_as_src, false);

         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src_reg(shuffled)));
      }
      break;
   }
   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
Ejemplo n.º 2
0
void
vec4_gs_visitor::visit(ir_emit_vertex *ir)
{
   this->current_annotation = "emit vertex: safety check";

   /* To ensure that we don't output more vertices than the shader specified
    * using max_vertices, do the logic inside a conditional of the form "if
    * (vertex_count < MAX)"
    */
   unsigned num_output_vertices = c->gp->program.VerticesOut;
   emit(CMP(dst_null_d(), this->vertex_count,
            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      /* If we're outputting 32 control data bits or less, then we can wait
       * until the shader is over to output them all.  Otherwise we need to
       * output them as we go.  Now is the time to do it, since we're about to
       * output the vertex_count'th vertex, so it's guaranteed that the
       * control data bits associated with the (vertex_count - 1)th vertex are
       * correct.
       */
      if (c->control_data_header_size_bits > 32) {
         this->current_annotation = "emit vertex: emit control data bits";
         /* Only emit control data bits if we've finished accumulating a batch
          * of 32 bits.  This is the case when:
          *
          *     (vertex_count * bits_per_vertex) % 32 == 0
          *
          * (in other words, when the last 5 bits of vertex_count *
          * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
          * integer n (which is always the case, since bits_per_vertex is
          * always 1 or 2), this is equivalent to requiring that the last 5-n
          * bits of vertex_count are 0:
          *
          *     vertex_count & (2^(5-n) - 1) == 0
          *
          * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
          * equivalent to:
          *
          *     vertex_count & (32 / bits_per_vertex - 1) == 0
          */
         vec4_instruction *inst =
            emit(AND(dst_null_d(), this->vertex_count,
                     (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
         inst->conditional_mod = BRW_CONDITIONAL_Z;
         emit(IF(BRW_PREDICATE_NORMAL));
         {
            emit_control_data_bits();

            /* Reset control_data_bits to 0 so we can start accumulating a new
             * batch.
             *
             * Note: in the case where vertex_count == 0, this neutralizes the
             * effect of any call to EndPrimitive() that the shader may have
             * made before outputting its first vertex.
             */
            inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
            inst->force_writemask_all = true;
         }
         emit(BRW_OPCODE_ENDIF);
      }

      this->current_annotation = "emit vertex: vertex data";
      emit_vertex();

      /* In stream mode we have to set control data bits for all vertices
       * unless we have disabled control data bits completely (which we do
       * do for GL_POINTS outputs that don't use streams).
       */
      if (c->control_data_header_size_bits > 0 &&
          c->prog_data.control_data_format ==
             GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
          this->current_annotation = "emit vertex: Stream control data bits";
          set_stream_control_data_bits(ir->stream_id());
      }

      this->current_annotation = "emit vertex: increment vertex count";
      emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
               src_reg(1u)));
   }
   emit(BRW_OPCODE_ENDIF);

   this->current_annotation = NULL;
}
Ejemplo n.º 3
0
void
vec4_gs_visitor::emit_prolog()
{
   /* In vertex shaders, r0.2 is guaranteed to be initialized to zero.  In
    * geometry shaders, it isn't (it contains a bunch of information we don't
    * need, like the input primitive type).  We need r0.2 to be zero in order
    * to build scratch read/write messages correctly (otherwise this value
    * will be interpreted as a global offset, causing us to do our scratch
    * reads/writes to garbage memory).  So just set it to zero at the top of
    * the shader.
    */
   this->current_annotation = "clear r0.2";
   dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2_IMMED, r0, 0u);
   inst->force_writemask_all = true;

   /* Create a virtual register to hold the vertex count */
   this->vertex_count = src_reg(this, glsl_type::uint_type);

   /* Initialize the vertex_count register to 0 */
   this->current_annotation = "initialize vertex_count";
   inst = emit(MOV(dst_reg(this->vertex_count), 0u));
   inst->force_writemask_all = true;

   if (c->control_data_header_size_bits > 0) {
      /* Create a virtual register to hold the current set of control data
       * bits.
       */
      this->control_data_bits = src_reg(this, glsl_type::uint_type);

      /* If we're outputting more than 32 control data bits, then EmitVertex()
       * will set control_data_bits to 0 after emitting the first vertex.
       * Otherwise, we need to initialize it to 0 here.
       */
      if (c->control_data_header_size_bits <= 32) {
         this->current_annotation = "initialize control data bits";
         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
         inst->force_writemask_all = true;
      }
   }

   /* If the geometry shader uses the gl_PointSize input, we need to fix it up
    * to account for the fact that the vertex shader stored it in the w
    * component of VARYING_SLOT_PSIZ.
    */
   if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) {
      this->current_annotation = "swizzle gl_PointSize input";
      for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) {
         dst_reg dst(ATTR,
                     BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
         dst.type = BRW_REGISTER_TYPE_F;
         src_reg src(dst);
         dst.writemask = WRITEMASK_X;
         src.swizzle = BRW_SWIZZLE_WWWW;
         inst = emit(MOV(dst, src));

         /* In dual instanced dispatch mode, dst has a width of 4, so we need
          * to make sure the MOV happens regardless of which channels are
          * enabled.
          */
         inst->force_writemask_all = true;
      }
   }

   this->current_annotation = NULL;
}
Ejemplo n.º 4
0
void
vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   dst_reg dest;
   src_reg src;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_per_vertex_input: {
      /* The EmitNoIndirectInput flag guarantees our vertex index will
       * be constant.  We should handle indirects someday.
       */
      nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
      nir_const_value *offset = nir_src_as_const_value(instr->src[1]);

      /* Make up a type...we have no way of knowing... */
      const glsl_type *const type = glsl_type::ivec(instr->num_components);

      src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] +
                          instr->const_index[0] + offset->u[0],
                    type);
      dest = get_nir_dest(instr->dest, src.type);
      dest.writemask = brw_writemask_for_size(instr->num_components);
      emit(MOV(dest, src));
      break;
   }

   case nir_intrinsic_load_input:
      unreachable("nir_lower_io should have produced per_vertex intrinsics");

   case nir_intrinsic_emit_vertex_with_counter: {
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      int stream_id = instr->const_index[0];
      gs_emit_vertex(stream_id);
      break;
   }

   case nir_intrinsic_end_primitive_with_counter:
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      gs_end_primitive();
      break;

   case nir_intrinsic_set_vertex_count:
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      break;

   case nir_intrinsic_load_primitive_id:
      assert(gs_prog_data->include_primitive_id);
      dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
      break;

   case nir_intrinsic_load_invocation_id: {
      src_reg invocation_id =
         src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
      assert(invocation_id.file != BAD_FILE);
      dest = get_nir_dest(instr->dest, invocation_id.type);
      emit(MOV(dest, invocation_id));
      break;
   }

   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
Ejemplo n.º 5
0
void
vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   switch (instr->intrinsic) {
   case nir_intrinsic_load_invocation_id:
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD),
               invocation_id));
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TCS_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;
   case nir_intrinsic_load_patch_vertices_in:
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
               brw_imm_d(key->input_vertices)));
      break;
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
      src_reg vertex_index =
         vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0]))
                      : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);

      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      dst.writemask = brw_writemask_for_size(instr->num_components);

      emit_input_urb_read(dst, vertex_index, imm_offset,
                          nir_intrinsic_component(instr), indirect_offset);
      break;
   }
   case nir_intrinsic_load_input:
      unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
      break;
   case nir_intrinsic_load_output:
   case nir_intrinsic_load_per_vertex_output: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      dst.writemask = brw_writemask_for_size(instr->num_components);

      if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
         dst.type = BRW_REGISTER_TYPE_F;

         /* This is a read of gl_TessLevelInner[], which lives in the
          * Patch URB header.  The layout depends on the domain.
          */
         switch (key->tes_primitive_mode) {
         case GL_QUADS: {
            /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
            dst_reg tmp(this, glsl_type::vec4_type);
            emit_output_urb_read(tmp, 0, 0, src_reg());
            emit(MOV(writemask(dst, WRITEMASK_XY),
                     swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
            break;
         }
         case GL_TRIANGLES:
            /* DWord 4; use offset 1 but normal swizzle/writemask. */
            emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0,
                                 src_reg());
            break;
         case GL_ISOLINES:
            /* All channels are undefined. */
            return;
         default:
            unreachable("Bogus tessellation domain");
         }
      } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
         dst.type = BRW_REGISTER_TYPE_F;
         unsigned swiz = BRW_SWIZZLE_WZYX;

         /* This is a read of gl_TessLevelOuter[], which lives in the
          * high 4 DWords of the Patch URB header, in reverse order.
          */
         switch (key->tes_primitive_mode) {
         case GL_QUADS:
            dst.writemask = WRITEMASK_XYZW;
            break;
         case GL_TRIANGLES:
            dst.writemask = WRITEMASK_XYZ;
            break;
         case GL_ISOLINES:
            /* Isolines are not reversed; swizzle .zw -> .xy */
            swiz = BRW_SWIZZLE_ZWZW;
            dst.writemask = WRITEMASK_XY;
            return;
         default:
            unreachable("Bogus tessellation domain");
         }

         dst_reg tmp(this, glsl_type::vec4_type);
         emit_output_urb_read(tmp, 1, 0, src_reg());
         emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
      } else {
         emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
                              indirect_offset);
      }
      break;
   }
   case nir_intrinsic_store_output:
   case nir_intrinsic_store_per_vertex_output: {
      src_reg value = get_nir_src(instr->src[0]);
      unsigned mask = instr->const_index[1];
      unsigned swiz = BRW_SWIZZLE_XYZW;

      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      /* The passthrough shader writes the whole patch header as two vec4s;
       * skip all the gl_TessLevelInner/Outer swizzling.
       */
      if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) {
         if (imm_offset == 0) {
            value.type = BRW_REGISTER_TYPE_F;

            mask &=
               (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1;

            /* This is a write to gl_TessLevelInner[], which lives in the
             * Patch URB header.  The layout depends on the domain.
             */
            switch (key->tes_primitive_mode) {
            case GL_QUADS:
               /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
                * We use an XXYX swizzle to reverse put .xy in the .wz
                * channels, and use a .zw writemask.
                */
               swiz = BRW_SWIZZLE4(0, 0, 1, 0);
               mask = writemask_for_backwards_vector(mask);
               break;
            case GL_TRIANGLES:
               /* gl_TessLevelInner[].x lives at DWord 4, so we set the
                * writemask to X and bump the URB offset by 1.
                */
               imm_offset = 1;
               break;
            case GL_ISOLINES:
               /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
               return;
            default:
               unreachable("Bogus tessellation domain");
            }
         } else if (imm_offset == 1) {
            value.type = BRW_REGISTER_TYPE_F;

            mask &=
               (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1;

            /* This is a write to gl_TessLevelOuter[] which lives in the
             * Patch URB Header at DWords 4-7.  However, it's reversed, so
             * instead of .xyzw we have .wzyx.
             */
            if (key->tes_primitive_mode == GL_ISOLINES) {
               /* Isolines .xy should be stored in .zw, in order. */
               swiz = BRW_SWIZZLE4(0, 0, 0, 1);
               mask <<= 2;
            } else {
               /* Other domains are reversed; store .wzyx instead of .xyzw. */
               swiz = BRW_SWIZZLE_WZYX;
               mask = writemask_for_backwards_vector(mask);
            }
         }
      }

      unsigned first_component = nir_intrinsic_component(instr);
      if (first_component) {
         assert(swiz == BRW_SWIZZLE_XYZW);
         swiz = BRW_SWZ_COMP_OUTPUT(first_component);
         mask = mask << first_component;
      }

      emit_urb_write(swizzle(value, swiz), mask,
                     imm_offset, indirect_offset);
      break;
   }

   case nir_intrinsic_barrier: {
      dst_reg header = dst_reg(this, glsl_type::uvec4_type);
      emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
      emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
      break;
   }

   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
Ejemplo n.º 6
0
static struct prog_src_register src_undef( void )
{
   return src_reg(PROGRAM_UNDEFINED, 0);
}
Ejemplo n.º 7
0
static void emit_interp( struct brw_wm_compile *c,
			 GLuint idx )
{
   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
   struct prog_src_register deltas = get_delta_xy(c);
   struct prog_src_register arg2;
   GLuint opcode;
   
   /* Need to use PINTERP on attributes which have been
    * multiplied by 1/W in the SF program, and LINTERP on those
    * which have not:
    */
   switch (idx) {
   case FRAG_ATTRIB_WPOS:
      opcode = WM_LINTERP;
      arg2 = src_undef();

      /* Have to treat wpos.xy specially:
       */
      emit_op(c,
	      WM_WPOSXY,
	      dst_mask(dst, WRITEMASK_XY),
	      0, 0, 0,
	      get_pixel_xy(c),
	      src_undef(),
	      src_undef());
      
      dst = dst_mask(dst, WRITEMASK_ZW);

      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
       */
      emit_op(c,
	      WM_LINTERP,
	      dst,
	      0, 0, 0,
	      interp,
	      deltas,
	      arg2);
      break;
   case FRAG_ATTRIB_COL0:
   case FRAG_ATTRIB_COL1:
      if (c->key.flat_shade) {
	 emit_op(c,
		 WM_CINTERP,
		 dst,
		 0, 0, 0,
		 interp,
		 src_undef(),
		 src_undef());
      }
      else {
	 emit_op(c,
		 WM_LINTERP,
		 dst,
		 0, 0, 0,
		 interp,
		 deltas,
		 src_undef());
      }
      break;
   default:
      emit_op(c,
	      WM_PINTERP,
	      dst,
	      0, 0, 0,
	      interp,
	      deltas,
	      get_pixel_w(c));
      break;
   }

   c->fp_interp_emitted |= 1<<idx;
}
Ejemplo n.º 8
0
static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
{
   return src_reg(dst.File, dst.Index);
}
Ejemplo n.º 9
0
void
gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
{
   struct brw_gs_prog_data *prog_data =
      (struct brw_gs_prog_data *) &c->prog_data;
   unsigned binding;
   unsigned num_bindings = prog_data->num_transform_feedback_bindings;
   src_reg sol_temp(this, glsl_type::uvec4_type);

   /* Check for buffer overflow: we need room to write the complete primitive
    * (all vertices). Otherwise, avoid writing any vertices for it
    */
   emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u));
   emit(MUL(dst_reg(sol_temp), sol_temp, src_reg(num_verts)));
   emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
   emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      /* Avoid overwriting MRF 1 as it is used as URB write message header */
      dst_reg mrf_reg(MRF, 2);

      this->current_annotation = "gen6: emit SOL vertex data";
      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (binding = 0; binding < num_bindings; ++binding) {
         unsigned char varying =
            prog_data->transform_feedback_bindings[binding];

         /* Set up the correct destination index for this vertex */
         vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
                                       mrf_reg,
                                       this->destination_indices);
         inst->sol_vertex = vertex % num_verts;

         /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
          *
          *   "Prior to End of Thread with a URB_WRITE, the kernel must
          *   ensure that all writes are complete by sending the final
          *   write as a committed write."
          */
         bool final_write = binding == (unsigned) num_bindings - 1 &&
                            inst->sol_vertex == num_verts - 1;

         /* Compute offset of this varying for the current vertex
          * in vertex_output
          */
         this->current_annotation = output_reg_annotation[varying];
         src_reg data(this->vertex_output);
         data.reladdr = ralloc(mem_ctx, src_reg);
         int offset = get_vertex_output_offset_for_varying(vertex, varying);
         emit(MOV(dst_reg(this->vertex_output_offset), offset));
         memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
         data.type = output_reg[varying].type;

         /* PSIZ, LAYER and VIEWPORT are packed in different channels of the
          * same slot, so make sure we write the appropriate channel
          */
         if (varying == VARYING_SLOT_PSIZ)
            data.swizzle = BRW_SWIZZLE_WWWW;
         else if (varying == VARYING_SLOT_LAYER)
            data.swizzle = BRW_SWIZZLE_YYYY;
         else if (varying == VARYING_SLOT_VIEWPORT)
            data.swizzle = BRW_SWIZZLE_ZZZZ;
         else
            data.swizzle = prog_data->transform_feedback_swizzles[binding];

         /* Write data */
         inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
         inst->sol_binding = binding;
         inst->sol_final_write = final_write;

         if (final_write) {
            /* This is the last vertex of the primitive, then increment
             * SO num primitive counter and destination indices.
             */
            emit(ADD(dst_reg(this->destination_indices),
                     this->destination_indices,
                     src_reg(num_verts)));
            emit(ADD(dst_reg(this->sol_prim_written),
                     this->sol_prim_written, 1u));
         }

      }
      this->current_annotation = NULL;
   }
   emit(BRW_OPCODE_ENDIF);
}
Ejemplo n.º 10
0
void
gen6_gs_visitor::xfb_write()
{
   unsigned num_verts;
   struct brw_gs_prog_data *prog_data =
      (struct brw_gs_prog_data *) &c->prog_data;

   if (!prog_data->num_transform_feedback_bindings)
      return;

   switch (c->prog_data.output_topology) {
   case _3DPRIM_POINTLIST:
      num_verts = 1;
      break;
   case _3DPRIM_LINELIST:
   case _3DPRIM_LINESTRIP:
   case _3DPRIM_LINELOOP:
      num_verts = 2;
      break;
   case _3DPRIM_TRILIST:
   case _3DPRIM_TRIFAN:
   case _3DPRIM_TRISTRIP:
   case _3DPRIM_RECTLIST:
      num_verts = 3;
      break;
   case _3DPRIM_QUADLIST:
   case _3DPRIM_QUADSTRIP:
   case _3DPRIM_POLYGON:
      num_verts = 3;
      break;
   default:
      unreachable("Unexpected primitive type in Gen6 SOL program.");
   }

   this->current_annotation = "gen6 thread end: svb writes init";

   emit(MOV(dst_reg(this->vertex_output_offset), 0u));
   emit(MOV(dst_reg(this->sol_prim_written), 0u));

   /* Check that at least one primitive can be written
    *
    * Note: since we use the binding table to keep track of buffer offsets
    * and stride, the GS doesn't need to keep track of a separate pointer
    * into each buffer; it uses a single pointer which increments by 1 for
    * each vertex.  So we use SVBI0 for this pointer, regardless of whether
    * transform feedback is in interleaved or separate attribs mode.
    */
   src_reg sol_temp(this, glsl_type::uvec4_type);
   emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts)));

   /* Compare SVBI calculated number with the maximum value, which is
    * in R1.4 (previously saved in this->max_svbi) for gen6.
    */
   emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      src_reg destination_indices_uw =
         retype(destination_indices, BRW_REGISTER_TYPE_UW);

      vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw),
                                        brw_imm_v(0x00020100))); /* (0, 1, 2) */
      inst->force_writemask_all = true;

      emit(ADD(dst_reg(this->destination_indices),
               this->destination_indices,
               this->svbi));
   }
   emit(BRW_OPCODE_ENDIF);

   /* Write transform feedback data for all processed vertices. */
   for (int i = 0; i < c->gp->program.VerticesOut; i++) {
      emit(MOV(dst_reg(sol_temp), i));
      emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
               BRW_CONDITIONAL_L));
      emit(IF(BRW_PREDICATE_NORMAL));
      {
         xfb_program(i, num_verts);
      }
      emit(BRW_OPCODE_ENDIF);
   }
}
Ejemplo n.º 11
0
void
gen6_gs_visitor::emit_prolog()
{
   vec4_gs_visitor::emit_prolog();

   /* Gen6 geometry shaders require to allocate an initial VUE handle via
    * FF_SYNC message, however the documentation remarks that only one thread
    * can write to the URB simultaneously and the FF_SYNC message provides the
    * synchronization mechanism for this, so using this message effectively
    * stalls the thread until it is its turn to write to the URB. Because of
    * this, the best way to implement geometry shader algorithms in gen6 is to
    * execute the algorithm before the FF_SYNC message to maximize parallelism.
    *
    * To achieve this we buffer the geometry shader outputs for each emitted
    * vertex in vertex_output during operation. Then, when we have processed
    * the last vertex (that is, at thread end time), we send the FF_SYNC
    * message to allocate the initial VUE handle and write all buffered vertex
    * data to the URB in one go.
    *
    * For each emitted vertex, vertex_output will hold vue_map.num_slots
    * data items plus one additional item to hold required flags
    * (PrimType, PrimStart, PrimEnd, as expected by the URB_WRITE message)
    * which come right after the data items for that vertex. Vertex data and
    * flags for the next vertex come right after the data items and flags for
    * the previous vertex.
    */
   this->current_annotation = "gen6 prolog";
   this->vertex_output = src_reg(this,
                                 glsl_type::uint_type,
                                 (prog_data->vue_map.num_slots + 1) *
                                 c->gp->program.VerticesOut);
   this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
   emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));

   /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES),
    * so initialize it once to R0.
    */
   vec4_instruction *inst = emit(MOV(dst_reg(MRF, 1),
                                     retype(brw_vec8_grf(0, 0),
                                            BRW_REGISTER_TYPE_UD)));
   inst->force_writemask_all = true;

   /* This will be used as a temporary to store writeback data of FF_SYNC
    * and URB_WRITE messages.
    */
   this->temp = src_reg(this, glsl_type::uint_type);

   /* This will be used to know when we are processing the first vertex of
    * a primitive. We will set this to URB_WRITE_PRIM_START only when we know
    * that we are processing the first vertex in the primitive and to zero
    * otherwise. This way we can use its value directly in the URB write
    * headers.
    */
   this->first_vertex = src_reg(this, glsl_type::uint_type);
   emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));

   /* The FF_SYNC message requires to know the number of primitives generated,
    * so keep a counter for this.
    */
   this->prim_count = src_reg(this, glsl_type::uint_type);
   emit(MOV(dst_reg(this->prim_count), 0u));

   if (c->prog_data.gen6_xfb_enabled) {
      /* Create a virtual register to hold destination indices in SOL */
      this->destination_indices = src_reg(this, glsl_type::uvec4_type);
      /* Create a virtual register to hold number of written primitives */
      this->sol_prim_written = src_reg(this, glsl_type::uint_type);
      /* Create a virtual register to hold Streamed Vertex Buffer Indices */
      this->svbi = src_reg(this, glsl_type::uvec4_type);
      /* Create a virtual register to hold max values of SVBI */
      this->max_svbi = src_reg(this, glsl_type::uvec4_type);
      emit(MOV(dst_reg(this->max_svbi),
               src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));

      xfb_setup();
   }

   /* PrimitveID is delivered in r0.1 of the thread payload. If the program
    * needs it we have to move it to a separate register where we can map
    * the atttribute.
    *
    * Notice that we cannot use a virtual register for this, because we need to
    * map all input attributes to hardware registers in setup_payload(),
    * which happens before virtual registers are mapped to hardware registers.
    * We could work around that issue if we were able to compute the first
    * non-payload register here and move the PrimitiveID information to that
    * register, but we can't because at this point we don't know the final
    * number uniforms that will be included in the payload.
    *
    * So, what we do is to place PrimitiveID information in r1, which is always
    * delivered as part of the payload, but its only populated with data
    * relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE
    * in the 3DSTATE_GS state packet. That information can be obtained by other
    * means though, so we can safely use r1 for this purpose.
    */
   if (c->prog_data.include_primitive_id) {
      this->primitive_id =
         src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
      emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
   }
}
Ejemplo n.º 12
0
void
gen6_gs_visitor::emit_thread_end()
{
   /* Make sure the current primitive is ended: we know it is not ended when
    * first_vertex is not zero. This is only relevant for outputs other than
    * points because in the point case we set PrimEnd on all vertices.
    */
   if (c->gp->program.OutputType != GL_POINTS) {
      emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
      emit(IF(BRW_PREDICATE_NORMAL));
      {
         visit((ir_end_primitive *) NULL);
      }
      emit(BRW_OPCODE_ENDIF);
   }

   /* Here we have to:
    * 1) Emit an FF_SYNC messsage to obtain an initial VUE handle.
    * 2) Loop over all buffered vertex data and write it to corresponding
    *    URB entries.
    * 3) Allocate new VUE handles for all vertices other than the first.
    * 4) Send a final EOT message.
    */

   /* MRF 0 is reserved for the debugger, so start with message header
    * in MRF 1.
    */
   int base_mrf = 1;

   /* In the process of generating our URB write message contents, we
    * may need to unspill a register or load from an array.  Those
    * reads would use MRFs 14-15.
    */
   int max_usable_mrf = 13;

   /* Issue the FF_SYNC message and obtain the initial VUE handle. */
   emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      this->current_annotation = "gen6 thread end: ff_sync";

      vec4_instruction *inst;
      if (c->prog_data.gen6_xfb_enabled) {
         src_reg sol_temp(this, glsl_type::uvec4_type);
         emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
              dst_reg(this->svbi),
              this->vertex_count,
              this->prim_count,
              sol_temp);
         inst = emit(GS_OPCODE_FF_SYNC,
                     dst_reg(this->temp), this->prim_count, this->svbi);
      } else {
         inst = emit(GS_OPCODE_FF_SYNC,
                     dst_reg(this->temp), this->prim_count, src_reg(0u));
      }
      inst->base_mrf = base_mrf;

      /* Loop over all buffered vertices and emit URB write messages */
      this->current_annotation = "gen6 thread end: urb writes init";
      src_reg vertex(this, glsl_type::uint_type);
      emit(MOV(dst_reg(vertex), 0u));
      emit(MOV(dst_reg(this->vertex_output_offset), 0u));

      this->current_annotation = "gen6 thread end: urb writes";
      emit(BRW_OPCODE_DO);
      {
         emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
         inst = emit(BRW_OPCODE_BREAK);
         inst->predicate = BRW_PREDICATE_NORMAL;

         /* First we prepare the message header */
         emit_urb_write_header(base_mrf);

         /* Then add vertex data to the message in interleaved fashion */
         int slot = 0;
         bool complete = false;
         do {
            int mrf = base_mrf + 1;

            /* URB offset is in URB row increments, and each of our MRFs is half
             * of one of those, since we're doing interleaved writes.
             */
            int urb_offset = slot / 2;

            for (; slot < prog_data->vue_map.num_slots; ++slot) {
               int varying = prog_data->vue_map.slot_to_varying[slot];
               current_annotation = output_reg_annotation[varying];

               /* Compute offset of this slot for the current vertex
                * in vertex_output
                */
               src_reg data(this->vertex_output);
               data.reladdr = ralloc(mem_ctx, src_reg);
               memcpy(data.reladdr, &this->vertex_output_offset,
                      sizeof(src_reg));

               /* Copy this slot to the appropriate message register */
               dst_reg reg = dst_reg(MRF, mrf);
               reg.type = output_reg[varying].type;
               data.type = reg.type;
               vec4_instruction *inst = emit(MOV(reg, data));
               inst->force_writemask_all = true;

               mrf++;
               emit(ADD(dst_reg(this->vertex_output_offset),
                        this->vertex_output_offset, 1u));

               /* If this was max_usable_mrf, we can't fit anything more into
                * this URB WRITE.
                */
               if (mrf > max_usable_mrf) {
                  slot++;
                  break;
               }
            }

            complete = slot >= prog_data->vue_map.num_slots;
            emit_urb_write_opcode(complete, base_mrf, mrf, urb_offset);
         } while (!complete);

         /* Skip over the flags data item so that vertex_output_offset points
          * to the first data item of the next vertex, so that we can start
          * writing the next vertex.
          */
         emit(ADD(dst_reg(this->vertex_output_offset),
                  this->vertex_output_offset, 1u));

         emit(ADD(dst_reg(vertex), vertex, 1u));
      }
      emit(BRW_OPCODE_WHILE);

      if (c->prog_data.gen6_xfb_enabled)
         xfb_write();
   }
   emit(BRW_OPCODE_ENDIF);

   /* Finally, emit EOT message.
    *
    * In gen6 we need to end the thread differently depending on whether we have
    * emitted at least one vertex or not. In case we did, the EOT message must
    * always include the COMPLETE flag or else the GPU hangs. If we have not
    * produced any output we can't use the COMPLETE flag.
    *
    * However, this would lead us to end the program with an ENDIF opcode,
    * which we want to avoid, so what we do is that we always request a new
    * VUE handle every time we do a URB WRITE, even for the last vertex we emit.
    * With this we make sure that whether we have emitted at least one vertex
    * or none at all, we have to finish the thread without writing to the URB,
    * which works for both cases by setting the COMPLETE and UNUSED flags in
    * the EOT message.
    */
   this->current_annotation = "gen6 thread end: EOT";

   if (c->prog_data.gen6_xfb_enabled) {
      /* When emitting EOT, set SONumPrimsWritten Increment Value. */
      src_reg data(this, glsl_type::uint_type);
      emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
      emit(SHL(dst_reg(data), data, src_reg(16u)));
      emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
   }

   vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
   inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
   inst->base_mrf = base_mrf;
   inst->mlen = 1;
}
Ejemplo n.º 13
0
void
gen6_gs_visitor::visit(ir_emit_vertex *)
{
   this->current_annotation = "gen6 emit vertex";
   /* Honor max_vertex layout indication in geometry shader by ignoring any
    * vertices coming after c->gp->program.VerticesOut.
    */
   unsigned num_output_vertices = c->gp->program.VerticesOut;
   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
            BRW_CONDITIONAL_L));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      /* Buffer all output slots for this vertex in vertex_output */
      for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
         int varying = prog_data->vue_map.slot_to_varying[slot];
         if (varying != VARYING_SLOT_PSIZ) {
            dst_reg dst(this->vertex_output);
            dst.reladdr = ralloc(mem_ctx, src_reg);
            memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
            emit_urb_slot(dst, varying);
         } else {
            /* The PSIZ slot can pack multiple varyings in different channels
             * and emit_urb_slot() will produce a MOV instruction for each of
             * them. Since we are writing to an array, that will translate to
             * possibly multiple MOV instructions with an array destination and
             * each will generate a scratch write with the same offset into
             * scratch space (thus, each one overwriting the previous). This is
             * not what we want. What we will do instead is emit PSIZ to a
             * a regular temporary register, then move that resgister into the
             * array. This way we only have one instruction with an array
             * destination and we only produce a single scratch write.
             */
            dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
            emit_urb_slot(tmp, varying);
            dst_reg dst(this->vertex_output);
            dst.reladdr = ralloc(mem_ctx, src_reg);
            memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
            vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
            inst->force_writemask_all = true;
         }

         emit(ADD(dst_reg(this->vertex_output_offset),
                  this->vertex_output_offset, 1u));
      }

      /* Now buffer flags for this vertex */
      dst_reg dst(this->vertex_output);
      dst.reladdr = ralloc(mem_ctx, src_reg);
      memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
      if (c->gp->program.OutputType == GL_POINTS) {
         /* If we are outputting points, then every vertex has PrimStart and
          * PrimEnd set.
          */
         emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
         emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
      } else {
         /* Otherwise, we can only set the PrimStart flag, which we have stored
          * in the first_vertex register. We will have to wait until we execute
          * EndPrimitive() or we end the thread to set the PrimEnd flag on a
          * vertex.
          */
         emit(OR(dst, this->first_vertex,
                 (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
         emit(MOV(dst_reg(this->first_vertex), 0u));
      }
      emit(ADD(dst_reg(this->vertex_output_offset),
               this->vertex_output_offset, 1u));

      /* Update vertex count */
      emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
   }
   emit(BRW_OPCODE_ENDIF);
}