Beispiel #1
0
static bool
run_tests(struct brw_context *brw)
{
   bool fail = false;

   for (int i = 0; i < ARRAY_SIZE(tests); i++) {
      for (int align_16 = 0; align_16 <= 1; align_16++) {
	 struct brw_compile *p = rzalloc(NULL, struct brw_compile);
	 brw_init_compile(brw, p, p);

	 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
	 if (align_16)
	    brw_set_default_access_mode(p, BRW_ALIGN_16);
	 else
	    brw_set_default_access_mode(p, BRW_ALIGN_1);

	 tests[i].func(p);
	 assert(p->nr_insn == 1);

	 if (!test_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 if (!test_fuzz_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 ralloc_free(p);
      }
   }

   return fail;
}
Beispiel #2
0
void
vec4_generator::generate_math2_gen4(vec4_instruction *inst,
                                    struct brw_reg dst,
                                    struct brw_reg src0,
                                    struct brw_reg src1)
{
   /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
    * "Message Payload":
    *
    * "Operand0[7].  For the INT DIV functions, this operand is the
    *  denominator."
    *  ...
    * "Operand1[7].  For the INT DIV functions, this operand is the
    *  numerator."
    */
   bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
   struct brw_reg &op0 = is_int_div ? src1 : src0;
   struct brw_reg &op1 = is_int_div ? src0 : src1;

   brw_push_insn_state(p);
   brw_set_default_saturate(p, false);
   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
   brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
   brw_pop_insn_state(p);

   gen4_math(p,
	     dst,
	     brw_math_function(inst->opcode),
	     inst->base_mrf,
	     op0,
	     BRW_MATH_DATA_VECTOR,
	     BRW_MATH_PRECISION_FULL);
}
Beispiel #3
0
static void
set_predicate_control_flag_value(struct brw_compile *p,
                                 struct brw_sf_compile *c,
                                 unsigned value)
{
   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);

   if (value != 0xff) {
      if (value != c->flag_value) {
         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
         c->flag_value = value;
      }

      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
   }
}
Beispiel #4
0
/* Points setup - several simplifications as all attributes are
 * constant across the face of the point (point sprites excluded!)
 */
void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);

   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */

   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {				
	 /* This seems odd as the values are all constant, but the
	  * fragment shader will be expecting it:
	  */
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }


      /* The delta values are always zero, just send the starting
       * coordinate.  Again, this is to fit in with the interpolation
       * code in the fragment shader.
       */
      {
	 set_predicate_control_flag_value(p, c, pc);

	 brw_MOV(p, c->m3C0, a0); /* constant value */

	 /* Copy m0..m3 to URB.
	  */
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0),
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* urb destination offset */
		       BRW_URB_SWIZZLE_TRANSPOSE);
      }
   }

   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
static void
gen_f0_0_MOV_GRF_GRF(struct brw_codegen *p)
{
   struct brw_reg g0 = brw_vec8_grf(0, 0);
   struct brw_reg g2 = brw_vec8_grf(2, 0);

   brw_push_insn_state(p);
   brw_set_default_predicate_control(p, true);
   brw_MOV(p, g0, g2);
   brw_pop_insn_state(p);
}
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7.  Explicitly test
 * it, so that we run the fuzzing can run over all the other bits that might
 * interact with it.
 */
static void
gen_f0_1_MOV_GRF_GRF(struct brw_codegen *p)
{
   struct brw_reg g0 = brw_vec8_grf(0, 0);
   struct brw_reg g2 = brw_vec8_grf(2, 0);

   brw_push_insn_state(p);
   brw_set_default_predicate_control(p, true);
   brw_inst *mov = brw_MOV(p, g0, g2);
   brw_inst_set_flag_subreg_nr(p->devinfo, mov, 1);
   brw_pop_insn_state(p);
}
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    struct brw_codegen *p = &c->func;

    if (p->devinfo->gen == 5) {
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
			c->reg.R0,
			0,
			c->reg.R0,
			1, /* allocate */
			1, /* response length */
			0 /* eot */);
        }
        brw_ENDIF(p);
        brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
Beispiel #8
0
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    struct brw_compile *p = &c->func;
    struct brw_context *brw = p->brw;

    if (brw->gen == 5) {
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        brw_last_inst->header.destreg__conditionalmod = BRW_CONDITIONAL_Z;
        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
			c->reg.R0,
			0,
			c->reg.R0,
			1, /* allocate */
			1, /* response length */
			0 /* eot */);
        }
        brw_ENDIF(p);
        brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
static bool
run_tests(const struct gen_device_info *devinfo)
{
   brw_init_compaction_tables(devinfo);
   bool fail = false;

   for (unsigned i = 0; i < ARRAY_SIZE(tests); i++) {
      for (int align_16 = 0; align_16 <= 1; align_16++) {
	 struct brw_codegen *p = rzalloc(NULL, struct brw_codegen);
	 brw_init_codegen(devinfo, p, p);

	 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
	 if (align_16)
	    brw_set_default_access_mode(p, BRW_ALIGN_16);
	 else
	    brw_set_default_access_mode(p, BRW_ALIGN_1);

	 tests[i].func(p);
	 assert(p->nr_insn == 1);

	 if (!test_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 if (!test_fuzz_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 ralloc_free(p);
      }
   }

   return fail;
}
Beispiel #10
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_codegen *p = &c->func;
   brw_inst *inst;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_ff_gs_alloc_regs(c, num_verts, true);
   brw_ff_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         inst = brw_MOV(p, destination_indices_uw,
                        brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                                : 0x00020001)); /* (1, 0, 2) */
         brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL);
      }

      assert(c->reg.destination_indices.width == BRW_EXECUTE_4);
      brw_push_insn_state(p);
      brw_set_default_exec_size(p, BRW_EXECUTE_4);
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
      brw_pop_insn_state(p);
      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_default_access_mode(p, BRW_ALIGN_16);
            brw_push_insn_state(p);
            brw_set_default_exec_size(p, BRW_EXECUTE_4);

            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_pop_insn_state(p);

            brw_set_default_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_ff_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_ff_gs_ff_sync(c, 1);

   brw_ff_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_ff_gs_offset_header_dw2(c,
                                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_ff_gs_offset_header_dw2(c,
                                  URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}
Beispiel #11
0
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);
   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      pc_coord_replace = calculate_point_sprite_mask(c, i);
      pc_persp &= ~pc_coord_replace;

      if (pc_persp) {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }

      /* Point sprite coordinate replacement: A texcoord with this
       * enabled gets replaced with the value (x, y, 0, 1) where x and
       * y vary from 0 to 1 across the horizontal and vertical of the
       * point.
       */
      if (pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc_coord_replace);
	 /* Caculate 1.0/PointWidth */
	 gen4_math(&c->func,
		   c->tmp,
		   BRW_MATH_FUNCTION_INV,
		   0,
		   c->dx0,
		   BRW_MATH_DATA_SCALAR,
		   BRW_MATH_PRECISION_FULL);

	 brw_set_default_access_mode(p, BRW_ALIGN_16);

	 /* dA/dx, dA/dy */
	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
	 } else {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
	 }

	 /* attribute constant offset */
	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
	 } else {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
	 }

	 brw_set_default_access_mode(p, BRW_ALIGN_1);
      }

      if (pc & ~pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
	 brw_MOV(p, c->m3C0, a0); /* constant value */
      }


      set_predicate_control_flag_value(p, c, pc);
      /* Copy m0..m3 to URB. */
      brw_urb_WRITE(p,
		    brw_null_reg(),
		    0,
		    brw_vec8_grf(0, 0),
                    last ? BRW_URB_WRITE_EOT_COMPLETE
                    : BRW_URB_WRITE_NO_FLAGS,
		    4, 	/* msg len */
		    0,	/* response len */
		    i*4,	/* urb destination offset */
		    BRW_URB_SWIZZLE_TRANSPOSE);
   }

   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
Beispiel #12
0
void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 2;

   if (allocate)
      alloc_regs(c);

   invert_det(c);
   copy_z_inv_w(c);

   if (c->has_flat_shading)
      do_flatshade_line(c);

   for (i = 0; i < c->nr_setup_regs; i++)
   {
      /* Pair of incoming attributes:
       */
      struct brw_reg a0 = offset(c->vert[0], i);
      struct brw_reg a1 = offset(c->vert[1], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
	 brw_MUL(p, a1, a1, c->inv_w[1]);
      }

      /* Calculate coefficients for position, color:
       */
      if (pc_linear) {
	 set_predicate_control_flag_value(p, c, pc_linear);

	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));

 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
		
	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
      }

      {
	 set_predicate_control_flag_value(p, c, pc);

	 /* start point for interpolation
	  */
	 brw_MOV(p, c->m3C0, a0);

	 /* Copy m0..m3 to URB.
	  */
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0),
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* urb destination offset */
		       BRW_URB_SWIZZLE_TRANSPOSE);
      }
   }

   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
Beispiel #13
0
void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 3;

   if (allocate)
      alloc_regs(c);

   invert_det(c);
   copy_z_inv_w(c);

   if (c->key.do_twoside_color)
      do_twoside_color(c);

   if (c->has_flat_shading)
      do_flatshade_triangle(c);


   for (i = 0; i < c->nr_setup_regs; i++)
   {
      /* Pair of incoming attributes:
       */
      struct brw_reg a0 = offset(c->vert[0], i);
      struct brw_reg a1 = offset(c->vert[1], i);
      struct brw_reg a2 = offset(c->vert[2], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
	 brw_MUL(p, a1, a1, c->inv_w[1]);
	 brw_MUL(p, a2, a2, c->inv_w[2]);
      }


      /* Calculate coefficients for interpolated values:
       */
      if (pc_linear)
      {
	 set_predicate_control_flag_value(p, c, pc_linear);

	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));

	 /* calculate dA/dx
	  */
	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
		
	 /* calculate dA/dy
	  */
	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
      }

      {
	 set_predicate_control_flag_value(p, c, pc);
	 /* start point for interpolation
	  */
	 brw_MOV(p, c->m3C0, a0);

	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
	  * the send instruction:
	  */	
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* offset */
		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
      }
   }

   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
Beispiel #14
0
static void brw_clip_test( struct brw_clip_compile *c )
{
    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);

    struct brw_reg v0 = get_tmp(c);
    struct brw_reg v1 = get_tmp(c);
    struct brw_reg v2 = get_tmp(c);

    struct brw_indirect vt0 = brw_indirect(0, 0);
    struct brw_indirect vt1 = brw_indirect(1, 0);
    struct brw_indirect vt2 = brw_indirect(2, 0);

    struct brw_codegen *p = &c->func;
    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */

    GLuint hpos_offset = brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_POS);

    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
    brw_MOV(p, v0, deref_4f(vt0, hpos_offset));
    brw_MOV(p, v1, deref_4f(vt1, hpos_offset));
    brw_MOV(p, v2, deref_4f(vt2, hpos_offset));
    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));

    /* test nearz, xmin, ymin plane */
    /* clip.xyz < -clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
    brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p);
    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

    /* test farz, xmax, ymax plane */
    /* clip.xyz > clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
    brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p);
    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

    release_tmps(c);
}
Beispiel #15
0
/* Use mesa's clipping algorithms, translated to GEN4 assembly.
 */
void brw_clip_tri( struct brw_clip_compile *c )
{
   struct brw_codegen *p = &c->func;
   struct brw_indirect vtx = brw_indirect(0, 0);
   struct brw_indirect vtxPrev = brw_indirect(1, 0);
   struct brw_indirect vtxOut = brw_indirect(2, 0);
   struct brw_indirect plane_ptr = brw_indirect(3, 0);
   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
   GLint clipdist0_offset = c->key.nr_userclip
      ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0)
      : 0;

   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));

   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );

   /* Set the initial vertex source mask: The first 6 planes are the bounds
    * of the view volume; the next 8 planes are the user clipping planes.
    */
   brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0));

   /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0].
    * We'll increment 6 times before we start hitting actual user clipping. */
   brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float)));

   brw_DO(p, BRW_EXECUTE_1);
   {
      /* if (planemask & 1)
       */
      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);

      brw_IF(p, BRW_EXECUTE_1);
      {
	 /* vtxOut = freelist_ptr++
	  */
	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));

	 if (c->key.nr_userclip)
	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
	 else
	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));

	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));

	 brw_DO(p, BRW_EXECUTE_1);
	 {
	    /* vtx = *input_ptr;
	     */
	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));

            load_clip_distance(c, vtxPrev, c->reg.dpPrev, hpos_offset, BRW_CONDITIONAL_L);
	    /* (prev < 0.0f) */
	    brw_IF(p, BRW_EXECUTE_1);
	    {
               load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_GE);
	       /* IS_POSITIVE(next)
		*/
	       brw_IF(p, BRW_EXECUTE_1);
	       {

		  /* Coming back in.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);

		  /* If (vtxOut == 0) vtxOut = vtxPrev
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
                  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev));
                  brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                            BRW_PREDICATE_NORMAL);

		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, false);

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++;
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       }
	       brw_ENDIF(p);

	    }
	    brw_ELSE(p);
	    {
	       /* *outlist_ptr++ = vtxPrev;
		* nr_verts++;
		*/
	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));

               load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_L);
	       /* (next < 0.0f)
		*/
	       brw_IF(p, BRW_EXECUTE_1);
	       {
		  /* Going out of bounds.  Avoid division by zero as we
		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);

		  /* If (vtxOut == 0) vtxOut = vtx
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
                  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx));
                  brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                            BRW_PREDICATE_NORMAL);

		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, true);

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++;
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       }
	       brw_ENDIF(p);
	    }
	    brw_ENDIF(p);

	    /* vtxPrev = vtx;
	     * inlist_ptr++;
	     */
	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));

	    /* while (--loopcount != 0)
	     */
	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
            brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
	 }
	 brw_WHILE(p);
         brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
	  * inlist = outlist
	  * inlist_ptr = &inlist[0]
	  * outlist_ptr = &outlist[0]
	  */
	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
      }
      brw_ENDIF(p);

      /* plane_ptr++;
       */
      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));

      /* nr_verts >= 3
       */
      brw_CMP(p,
	      vec1(brw_null_reg()),
	      BRW_CONDITIONAL_GE,
	      c->reg.nr_verts,
	      brw_imm_ud(3));
      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);

      /* && (planemask>>=1) != 0
       */
      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1));
      brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float)));
   }
   brw_WHILE(p);
   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
Beispiel #16
0
void
vec4_generator::generate_code(const cfg_t *cfg)
{
   struct annotation_info annotation;
   memset(&annotation, 0, sizeof(annotation));

   foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
      struct brw_reg src[3], dst;

      if (unlikely(debug_flag))
         annotate(brw, &annotation, cfg, inst, p->next_insn_offset);

      for (unsigned int i = 0; i < 3; i++) {
	 src[i] = inst->get_src(this->prog_data, i);
      }
      dst = inst->get_dst();

      brw_set_default_predicate_control(p, inst->predicate);
      brw_set_default_predicate_inverse(p, inst->predicate_inverse);
      brw_set_default_saturate(p, inst->saturate);
      brw_set_default_mask_control(p, inst->force_writemask_all);
      brw_set_default_acc_write_control(p, inst->writes_accumulator);

      unsigned pre_emit_nr_insn = p->nr_insn;

      generate_vec4_instruction(inst, dst, src);

      if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
         assert(p->nr_insn == pre_emit_nr_insn + 1 ||
                !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
                 "emitting more than 1 instruction");

         brw_inst *last = &p->store[pre_emit_nr_insn];

         brw_inst_set_cond_modifier(brw, last, inst->conditional_mod);
         brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear);
         brw_inst_set_no_dd_check(brw, last, inst->no_dd_check);
      }
   }

   brw_set_uip_jip(p);
   annotation_finalize(&annotation, p->next_insn_offset);

   int before_size = p->next_insn_offset;
   brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann);
   int after_size = p->next_insn_offset;

   if (unlikely(debug_flag)) {
      if (shader_prog) {
         fprintf(stderr, "Native code for %s vertex shader %d:\n",
                 shader_prog->Label ? shader_prog->Label : "unnamed",
                 shader_prog->Name);
      } else {
         fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
      }
      fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d"
                      " bytes (%.0f%%)\n",
              before_size / 16, before_size, after_size,
              100.0f * (before_size - after_size) / before_size);

      dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog);
      ralloc_free(annotation.ann);
   }
}
Beispiel #17
0
/**
 * Generate assembly for a Vec4 IR instruction.
 *
 * \param instruction The Vec4 IR instruction to generate code for.
 * \param dst         The destination register.
 * \param src         An array of up to three source registers.
 */
void
vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
                                          struct brw_reg dst,
                                          struct brw_reg *src)
{
   vec4_instruction *inst = (vec4_instruction *) instruction;

   if (dst.width == BRW_WIDTH_4) {
      /* This happens in attribute fixups for "dual instanced" geometry
       * shaders, since they use attributes that are vec4's.  Since the exec
       * width is only 4, it's essential that the caller set
       * force_writemask_all in order to make sure the instruction is executed
       * regardless of which channels are enabled.
       */
      assert(inst->force_writemask_all);

      /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
       * the following register region restrictions (from Graphics BSpec:
       * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
       * > Register Region Restrictions)
       *
       *     1. ExecSize must be greater than or equal to Width.
       *
       *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
       *        to Width * HorzStride."
       */
      for (int i = 0; i < 3; i++) {
         if (src[i].file == BRW_GENERAL_REGISTER_FILE)
            src[i] = stride(src[i], 4, 4, 1);
      }
   }

   switch (inst->opcode) {
   case BRW_OPCODE_MOV:
      brw_MOV(p, dst, src[0]);
      break;
   case BRW_OPCODE_ADD:
      brw_ADD(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_MUL:
      brw_MUL(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_MACH:
      brw_MACH(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MAD:
      assert(brw->gen >= 6);
      brw_MAD(p, dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_FRC:
      brw_FRC(p, dst, src[0]);
      break;
   case BRW_OPCODE_RNDD:
      brw_RNDD(p, dst, src[0]);
      break;
   case BRW_OPCODE_RNDE:
      brw_RNDE(p, dst, src[0]);
      break;
   case BRW_OPCODE_RNDZ:
      brw_RNDZ(p, dst, src[0]);
      break;

   case BRW_OPCODE_AND:
      brw_AND(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_OR:
      brw_OR(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_XOR:
      brw_XOR(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_NOT:
      brw_NOT(p, dst, src[0]);
      break;
   case BRW_OPCODE_ASR:
      brw_ASR(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_SHR:
      brw_SHR(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_SHL:
      brw_SHL(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_CMP:
      brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
      break;
   case BRW_OPCODE_SEL:
      brw_SEL(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DPH:
      brw_DPH(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP4:
      brw_DP4(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP3:
      brw_DP3(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP2:
      brw_DP2(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_F32TO16:
      assert(brw->gen >= 7);
      brw_F32TO16(p, dst, src[0]);
      break;

   case BRW_OPCODE_F16TO32:
      assert(brw->gen >= 7);
      brw_F16TO32(p, dst, src[0]);
      break;

   case BRW_OPCODE_LRP:
      assert(brw->gen >= 6);
      brw_LRP(p, dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFREV:
      assert(brw->gen >= 7);
      /* BFREV only supports UD type for src and dst. */
      brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
                   retype(src[0], BRW_REGISTER_TYPE_UD));
      break;
   case BRW_OPCODE_FBH:
      assert(brw->gen >= 7);
      /* FBH only supports UD type for dst. */
      brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;
   case BRW_OPCODE_FBL:
      assert(brw->gen >= 7);
      /* FBL only supports UD type for dst. */
      brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;
   case BRW_OPCODE_CBIT:
      assert(brw->gen >= 7);
      /* CBIT only supports UD type for dst. */
      brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;
   case BRW_OPCODE_ADDC:
      assert(brw->gen >= 7);
      brw_ADDC(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_SUBB:
      assert(brw->gen >= 7);
      brw_SUBB(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_MAC:
      brw_MAC(p, dst, src[0], src[1]);
      break;

   case BRW_OPCODE_BFE:
      assert(brw->gen >= 7);
      brw_BFE(p, dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFI1:
      assert(brw->gen >= 7);
      brw_BFI1(p, dst, src[0], src[1]);
      break;
   case BRW_OPCODE_BFI2:
      assert(brw->gen >= 7);
      brw_BFI2(p, dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_IF:
      if (inst->src[0].file != BAD_FILE) {
         /* The instruction has an embedded compare (only allowed on gen6) */
         assert(brw->gen == 6);
         gen6_IF(p, inst->conditional_mod, src[0], src[1]);
      } else {
         brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8);
         brw_inst_set_pred_control(brw, if_inst, inst->predicate);
      }
      break;

   case BRW_OPCODE_ELSE:
      brw_ELSE(p);
      break;
   case BRW_OPCODE_ENDIF:
      brw_ENDIF(p);
      break;

   case BRW_OPCODE_DO:
      brw_DO(p, BRW_EXECUTE_8);
      break;

   case BRW_OPCODE_BREAK:
      brw_BREAK(p);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      break;
   case BRW_OPCODE_CONTINUE:
      brw_CONT(p);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      break;

   case BRW_OPCODE_WHILE:
      brw_WHILE(p);
      break;

   case SHADER_OPCODE_RCP:
   case SHADER_OPCODE_RSQ:
   case SHADER_OPCODE_SQRT:
   case SHADER_OPCODE_EXP2:
   case SHADER_OPCODE_LOG2:
   case SHADER_OPCODE_SIN:
   case SHADER_OPCODE_COS:
      if (brw->gen >= 7) {
         gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
                   brw_null_reg());
      } else if (brw->gen == 6) {
	 generate_math_gen6(inst, dst, src[0], brw_null_reg());
      } else {
	 generate_math1_gen4(inst, dst, src[0]);
      }
      break;

   case SHADER_OPCODE_POW:
   case SHADER_OPCODE_INT_QUOTIENT:
   case SHADER_OPCODE_INT_REMAINDER:
      if (brw->gen >= 7) {
         gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
      } else if (brw->gen == 6) {
	 generate_math_gen6(inst, dst, src[0], src[1]);
      } else {
	 generate_math2_gen4(inst, dst, src[0], src[1]);
      }
      break;

   case SHADER_OPCODE_TEX:
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
   case SHADER_OPCODE_TXF_CMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
   case SHADER_OPCODE_TG4:
   case SHADER_OPCODE_TG4_OFFSET:
      generate_tex(inst, dst, src[0], src[1]);
      break;

   case VS_OPCODE_URB_WRITE:
      generate_vs_urb_write(inst);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_READ:
      generate_scratch_read(inst, dst, src[0]);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
      generate_scratch_write(inst, dst, src[0], src[1]);
      break;

   case VS_OPCODE_PULL_CONSTANT_LOAD:
      generate_pull_constant_load(inst, dst, src[0], src[1]);
      break;

   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
      generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
      break;

   case GS_OPCODE_URB_WRITE:
      generate_gs_urb_write(inst);
      break;

   case GS_OPCODE_THREAD_END:
      generate_gs_thread_end(inst);
      break;

   case GS_OPCODE_SET_WRITE_OFFSET:
      generate_gs_set_write_offset(dst, src[0], src[1]);
      break;

   case GS_OPCODE_SET_VERTEX_COUNT:
      generate_gs_set_vertex_count(dst, src[0]);
      break;

   case GS_OPCODE_SET_DWORD_2_IMMED:
      generate_gs_set_dword_2_immed(dst, src[0]);
      break;

   case GS_OPCODE_PREPARE_CHANNEL_MASKS:
      generate_gs_prepare_channel_masks(dst);
      break;

   case GS_OPCODE_SET_CHANNEL_MASKS:
      generate_gs_set_channel_masks(dst, src[0]);
      break;

   case GS_OPCODE_GET_INSTANCE_ID:
      generate_gs_get_instance_id(dst);
      break;

   case SHADER_OPCODE_SHADER_TIME_ADD:
      brw_shader_time_add(p, src[0],
                          prog_data->base.binding_table.shader_time_start);
      brw_mark_surface_used(&prog_data->base,
                            prog_data->base.binding_table.shader_time_start);
      break;

   case SHADER_OPCODE_UNTYPED_ATOMIC:
      generate_untyped_atomic(inst, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
      generate_untyped_surface_read(inst, dst, src[0]);
      break;

   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
      generate_unpack_flags(inst, dst);
      break;

   default:
      if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
         _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",
                       opcode_descs[inst->opcode].name);
      } else {
         _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode);
      }
      abort();
   }
}
Beispiel #18
0
void
vec4_generator::generate_scratch_write(vec4_instruction *inst,
                                       struct brw_reg dst,
                                       struct brw_reg src,
                                       struct brw_reg index)
{
   struct brw_reg header = brw_vec8_grf(0, 0);
   bool write_commit;

   /* If the instruction is predicated, we'll predicate the send, not
    * the header setup.
    */
   brw_set_default_predicate_control(p, false);

   gen6_resolve_implied_move(p, &header, inst->base_mrf);

   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
				     index);

   brw_MOV(p,
	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
	   retype(src, BRW_REGISTER_TYPE_D));

   uint32_t msg_type;

   if (brw->gen >= 7)
      msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
   else if (brw->gen == 6)
      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
   else
      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;

   brw_set_default_predicate_control(p, inst->predicate);

   /* Pre-gen6, we have to specify write commits to ensure ordering
    * between reads and writes within a thread.  Afterwards, that's
    * guaranteed and write commits only matter for inter-thread
    * synchronization.
    */
   if (brw->gen >= 6) {
      write_commit = false;
   } else {
      /* The visitor set up our destination register to be g0.  This
       * means that when the next read comes along, we will end up
       * reading from g0 and causing a block on the write commit.  For
       * write-after-read, we are relying on the value of the previous
       * read being used (and thus blocking on completion) before our
       * write is executed.  This means we have to be careful in
       * instruction scheduling to not violate this assumption.
       */
      write_commit = true;
   }

   /* Each of the 8 channel enables is considered for whether each
    * dword is written.
    */
   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
   brw_set_dest(p, send, dst);
   brw_set_src0(p, send, header);
   if (brw->gen < 6)
      brw_inst_set_cond_modifier(brw, send, inst->base_mrf);
   brw_set_dp_write_message(p, send,
			    255, /* binding table index: stateless access */
			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
			    msg_type,
			    3, /* mlen */
			    true, /* header present */
			    false, /* not a render target write */
			    write_commit, /* rlen */
			    false, /* eot */
			    write_commit);
}