示例#1
0
static void brw_wm_xy(struct brw_compile *p, int dw)
{
	struct brw_reg r1 = brw_vec1_grf(1, 0);
	struct brw_reg r1_uw = __retype_uw(r1);
	struct brw_reg x_uw, y_uw;

	brw_set_compression_control(p, BRW_COMPRESSION_NONE);

	if (dw == 16) {
		x_uw = brw_uw16_grf(30, 0);
		y_uw = brw_uw16_grf(28, 0);
	} else {
		x_uw = brw_uw8_grf(30, 0);
		y_uw = brw_uw8_grf(28, 0);
	}

	brw_ADD(p,
		x_uw,
		__stride(__suboffset(r1_uw, 4), 2, 4, 0),
		brw_imm_v(0x10101010));
	brw_ADD(p,
		y_uw,
		__stride(__suboffset(r1_uw, 5), 2, 4, 0),
		brw_imm_v(0x11001100));

	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);

	brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
	brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
static void emit_pixel_xy(struct brw_compile *p,
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0)
{
   struct brw_reg r1 = brw_vec1_grf(1, 0);
   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);

   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   /* Calculate pixel centers by adding 1 or 0 to each of the
    * micro-tile coordinates passed in r1.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
	      stride(suboffset(r1_uw, 4), 2, 4, 0),
	      brw_imm_v(0x10101010));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
	      stride(suboffset(r1_uw,5), 2, 4, 0),
	      brw_imm_v(0x11001100));
   }

   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
static void emit_pixel_xy(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_reg r1 = brw_vec1_grf(1, 0);
    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);

    struct brw_reg dst0, dst1;
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    dst0 = get_dst_reg(c, inst, 0, 1);
    dst1 = get_dst_reg(c, inst, 1, 1);
    /* Calculate pixel centers by adding 1 or 0 to each of the
     * micro-tile coordinates passed in r1.
     */
    if (mask & WRITEMASK_X) {
	brw_ADD(p,
		vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 4), 2, 4, 0),
		brw_imm_v(0x10101010));
    }

    if (mask & WRITEMASK_Y) {
	brw_ADD(p,
		vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 5), 2, 4, 0),
		brw_imm_v(0x11001100));
    }

}
示例#4
0
/**
 * Computes the screen-space x,y position of the pixels.
 *
 * This will be used by emit_delta_xy() or emit_wpos_xy() for
 * interpolation of attributes..
 *
 * Payload R0:
 *
 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
 *         corresponding to each of the 16 execution channels.
 * R0.1..8 -- ?
 * R1.0 -- triangle vertex 0.X
 * R1.1 -- triangle vertex 0.Y
 * R1.2 -- tile 0 x,y coords (2 packed uwords)
 * R1.3 -- tile 1 x,y coords (2 packed uwords)
 * R1.4 -- tile 2 x,y coords (2 packed uwords)
 * R1.5 -- tile 3 x,y coords (2 packed uwords)
 * R1.6 -- ?
 * R1.7 -- ?
 * R1.8 -- ?
 */
void emit_pixel_xy(struct brw_wm_compile *c,
		   const struct brw_reg *dst,
		   GLuint mask)
{
   struct brw_compile *p = &c->func;
   struct brw_reg r1 = brw_vec1_grf(1, 0);
   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
   struct brw_reg dst0_uw, dst1_uw;

   brw_push_insn_state(p);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   if (c->dispatch_width == 16) {
      dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
   } else {
      dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
   }

   /* Calculate pixel centers by adding 1 or 0 to each of the
    * micro-tile coordinates passed in r1.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      dst0_uw,
	      stride(suboffset(r1_uw, 4), 2, 4, 0),
	      brw_imm_v(0x10101010));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      dst1_uw,
	      stride(suboffset(r1_uw,5), 2, 4, 0),
	      brw_imm_v(0x11001100));
   }
   brw_pop_insn_state(p);
}
示例#5
0
/**
 * Computes the screen-space x,y distance of the pixels from the start
 * vertex.
 *
 * This will be used in linterp or pinterp with the start vertex value
 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
 * to produce interpolated attribute values.
 */
void emit_delta_xy(struct brw_compile *p,
		   const struct brw_reg *dst,
		   GLuint mask,
		   const struct brw_reg *arg0)
{
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg r1 = brw_vec1_grf(1, 0);

   if (mask == 0)
      return;

   assert(mask == WRITEMASK_XY);

   if (intel->gen >= 6) {
       /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1.
	  Just add them with 0.0 for dst reg.. */
       r1 = brw_imm_v(0x00000000);
       brw_ADD(p,
	       dst[0],
	       retype(arg0[0], BRW_REGISTER_TYPE_UW),
	       r1);
       brw_ADD(p,
	       dst[1],
	       retype(arg0[1], BRW_REGISTER_TYPE_UW),
	       r1);
       return;
   }

   /* Calc delta X,Y by subtracting origin in r1 from the pixel
    * centers produced by emit_pixel_xy().
    */
   brw_ADD(p,
	   dst[0],
	   retype(arg0[0], BRW_REGISTER_TYPE_UW),
	   negate(r1));
   brw_ADD(p,
	   dst[1],
	   retype(arg0[1], BRW_REGISTER_TYPE_UW),
	   negate(suboffset(r1,1)));
}
示例#6
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_compile *p = &c->func;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_gs_alloc_regs(c, num_verts, true);
   brw_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         brw_MOV(p, destination_indices_uw,
                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                         : 0x00020001)); /* (1, 0, 2) */
         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      }
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));

      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_access_mode(p, BRW_ALIGN_16);
            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_set_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_gs_ff_sync(c, 1);

   /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
    * release the URB that was just allocated, and terminate the thread.
    */
   if (key->rasterizer_discard) {
      brw_gs_terminate(c);
      return;
   }

   brw_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}
示例#7
0
void
gen6_gs_visitor::xfb_write()
{
   unsigned num_verts;
   struct brw_gs_prog_data *prog_data =
      (struct brw_gs_prog_data *) &c->prog_data;

   if (!prog_data->num_transform_feedback_bindings)
      return;

   switch (c->prog_data.output_topology) {
   case _3DPRIM_POINTLIST:
      num_verts = 1;
      break;
   case _3DPRIM_LINELIST:
   case _3DPRIM_LINESTRIP:
   case _3DPRIM_LINELOOP:
      num_verts = 2;
      break;
   case _3DPRIM_TRILIST:
   case _3DPRIM_TRIFAN:
   case _3DPRIM_TRISTRIP:
   case _3DPRIM_RECTLIST:
      num_verts = 3;
      break;
   case _3DPRIM_QUADLIST:
   case _3DPRIM_QUADSTRIP:
   case _3DPRIM_POLYGON:
      num_verts = 3;
      break;
   default:
      unreachable("Unexpected primitive type in Gen6 SOL program.");
   }

   this->current_annotation = "gen6 thread end: svb writes init";

   emit(MOV(dst_reg(this->vertex_output_offset), 0u));
   emit(MOV(dst_reg(this->sol_prim_written), 0u));

   /* Check that at least one primitive can be written
    *
    * Note: since we use the binding table to keep track of buffer offsets
    * and stride, the GS doesn't need to keep track of a separate pointer
    * into each buffer; it uses a single pointer which increments by 1 for
    * each vertex.  So we use SVBI0 for this pointer, regardless of whether
    * transform feedback is in interleaved or separate attribs mode.
    */
   src_reg sol_temp(this, glsl_type::uvec4_type);
   emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts)));

   /* Compare SVBI calculated number with the maximum value, which is
    * in R1.4 (previously saved in this->max_svbi) for gen6.
    */
   emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
   emit(IF(BRW_PREDICATE_NORMAL));
   {
      src_reg destination_indices_uw =
         retype(destination_indices, BRW_REGISTER_TYPE_UW);

      vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw),
                                        brw_imm_v(0x00020100))); /* (0, 1, 2) */
      inst->force_writemask_all = true;

      emit(ADD(dst_reg(this->destination_indices),
               this->destination_indices,
               this->svbi));
   }
   emit(BRW_OPCODE_ENDIF);

   /* Write transform feedback data for all processed vertices. */
   for (int i = 0; i < c->gp->program.VerticesOut; i++) {
      emit(MOV(dst_reg(sol_temp), i));
      emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
               BRW_CONDITIONAL_L));
      emit(IF(BRW_PREDICATE_NORMAL));
      {
         xfb_program(i, num_verts);
      }
      emit(BRW_OPCODE_ENDIF);
   }
}