Esempio n. 1
0
void
vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
                                             struct brw_reg src)
{
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);

   if (brw->gen >= 8) {
      /* Move the vertex count into the second MRF for the EOT write. */
      brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
              src);
   } else {
      /* If we think of the src and dst registers as composed of 8 DWORDs each,
       * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
       * them to WORDs, and then pack them into DWORD 2 of dst.
       *
       * It's easier to get the EU to do this if we think of the src and dst
       * registers as composed of 16 WORDS each; then, we want to pick up the
       * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
       * of dst.
       *
       * We can do that by the following EU instruction:
       *
       *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
       */
      brw_set_default_access_mode(p, BRW_ALIGN_1);
      brw_MOV(p,
              suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
              stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
      brw_set_default_access_mode(p, BRW_ALIGN_16);
   }
   brw_pop_insn_state(p);
}
Esempio n. 2
0
static bool
run_tests(struct brw_context *brw)
{
   bool fail = false;

   for (int i = 0; i < ARRAY_SIZE(tests); i++) {
      for (int align_16 = 0; align_16 <= 1; align_16++) {
	 struct brw_compile *p = rzalloc(NULL, struct brw_compile);
	 brw_init_compile(brw, p, p);

	 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
	 if (align_16)
	    brw_set_default_access_mode(p, BRW_ALIGN_16);
	 else
	    brw_set_default_access_mode(p, BRW_ALIGN_1);

	 tests[i].func(p);
	 assert(p->nr_insn == 1);

	 if (!test_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 if (!test_fuzz_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 ralloc_free(p);
      }
   }

   return fail;
}
Esempio n. 3
0
void
vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
                                             struct brw_reg src0,
                                             struct brw_reg src1)
{
   /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
    * Header: M0.3):
    *
    *     Slot 0 Offset. This field, after adding to the Global Offset field
    *     in the message descriptor, specifies the offset (in 256-bit units)
    *     from the start of the URB entry, as referenced by URB Handle 0, at
    *     which the data will be accessed.
    *
    * Similar text describes DWORD M0.4, which is slot 1 offset.
    *
    * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
    * of the register for geometry shader invocations 0 and 1) by the
    * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
    *
    * We can do this with the following EU instruction:
    *
    *     mul(2) dst.3<1>UD src0<8;2,4>UD src1   { Align1 WE_all }
    */
   brw_push_insn_state(p);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
           src1);
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   brw_pop_insn_state(p);
}
Esempio n. 4
0
void
vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
                                              struct brw_reg src)
{
   assert(src.file == BRW_IMMEDIATE_VALUE);

   brw_push_insn_state(p);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_MOV(p, suboffset(vec1(dst), 2), src);
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   brw_pop_insn_state(p);
}
/* Project 'pos' to screen space (or back again), overwrite with results:
 */
void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
{
   struct brw_codegen *p = &c->func;

   /* calc rhw
    */
   brw_math_invert(p, get_element(pos, W), get_element(pos, W));

   /* value.xyz *= value.rhw
    */
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
   brw_set_default_access_mode(p, BRW_ALIGN_1);
}
Esempio n. 6
0
void
vec4_generator::generate_math_gen6(vec4_instruction *inst,
                                   struct brw_reg dst,
                                   struct brw_reg src0,
                                   struct brw_reg src1)
{
   /* Can't do writemask because math can't be align16. */
   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
   /* Source swizzles are ignored. */
   check_gen6_math_src_arg(src0);
   if (src1.file == BRW_GENERAL_REGISTER_FILE)
      check_gen6_math_src_arg(src1);

   brw_set_default_access_mode(p, BRW_ALIGN_1);
   gen6_math(p, dst, brw_math_function(inst->opcode), src0, src1);
   brw_set_default_access_mode(p, BRW_ALIGN_16);
}
Esempio n. 7
0
/* This is performed against the original triangles, so no indirection
 * required:
BZZZT!
 */
static void compute_tri_direction( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg e = c->reg.tmp0;
   struct brw_reg f = c->reg.tmp1;
   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
   struct brw_reg v0 = byte_offset(c->reg.vertex[0], hpos_offset);
   struct brw_reg v1 = byte_offset(c->reg.vertex[1], hpos_offset);
   struct brw_reg v2 = byte_offset(c->reg.vertex[2], hpos_offset);


   struct brw_reg v0n = get_tmp(c);
   struct brw_reg v1n = get_tmp(c);
   struct brw_reg v2n = get_tmp(c);

   /* Convert to NDC.
    * NOTE: We can't modify the original vertex coordinates,
    * as it may impact further operations.
    * So, we have to keep normalized coordinates in temp registers.
    *
    * TBD-KC
    * Try to optimize unnecessary MOV's.
    */
   brw_MOV(p, v0n, v0);
   brw_MOV(p, v1n, v1);
   brw_MOV(p, v2n, v2);

   brw_clip_project_position(c, v0n);
   brw_clip_project_position(c, v1n);
   brw_clip_project_position(c, v2n);

   /* Calculate the vectors of two edges of the triangle:
    */
   brw_ADD(p, e, v0n, negate(v2n));
   brw_ADD(p, f, v1n, negate(v2n));

   /* Take their crossproduct:
    */
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
   brw_set_default_access_mode(p, BRW_ALIGN_1);

   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
}
Esempio n. 8
0
const unsigned *
vec4_generator::generate_assembly(const cfg_t *cfg,
                                  unsigned *assembly_size)
{
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   generate_code(cfg);

   return brw_get_program(p, assembly_size);
}
Esempio n. 9
0
const unsigned *
vec4_generator::generate_assembly(exec_list *instructions,
                                  unsigned *assembly_size)
{
    brw_set_default_access_mode(p, BRW_ALIGN_16);
    generate_code(instructions);

    return brw_get_program(p, assembly_size);
}
Esempio n. 10
0
static bool
run_tests(const struct gen_device_info *devinfo)
{
   brw_init_compaction_tables(devinfo);
   bool fail = false;

   for (unsigned i = 0; i < ARRAY_SIZE(tests); i++) {
      for (int align_16 = 0; align_16 <= 1; align_16++) {
	 struct brw_codegen *p = rzalloc(NULL, struct brw_codegen);
	 brw_init_codegen(devinfo, p, p);

	 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
	 if (align_16)
	    brw_set_default_access_mode(p, BRW_ALIGN_16);
	 else
	    brw_set_default_access_mode(p, BRW_ALIGN_1);

	 tests[i].func(p);
	 assert(p->nr_insn == 1);

	 if (!test_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 if (!test_fuzz_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 ralloc_free(p);
      }
   }

   return fail;
}
Esempio n. 11
0
void
vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
{
   /* We want to left shift just DWORD 4 (the x component belonging to the
    * second geometry shader invocation) by 4 bits.  So generate the
    * instruction:
    *
    *     shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
    */
   dst = suboffset(vec1(dst), 4);
   brw_push_insn_state(p);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_SHL(p, dst, dst, brw_imm_ud(4));
   brw_pop_insn_state(p);
}
Esempio n. 12
0
void
vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
{
   /* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT
    * and store into dst.0 & dst.4. So generate the instruction:
    *
    *     shr(8) dst<1> R0<1,4,0> GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q }
    */
   brw_push_insn_state(p);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   dst = retype(dst, BRW_REGISTER_TYPE_UD);
   struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
   brw_SHR(p, dst, stride(r0, 1, 4, 0),
           brw_imm_ud(GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT));
   brw_pop_insn_state(p);
}
Esempio n. 13
0
void
vec4_generator::generate_unpack_flags(vec4_instruction *inst,
                                      struct brw_reg dst)
{
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_access_mode(p, BRW_ALIGN_1);

   struct brw_reg flags = brw_flag_reg(0, 0);
   struct brw_reg dst_0 = suboffset(vec1(dst), 0);
   struct brw_reg dst_4 = suboffset(vec1(dst), 4);

   brw_AND(p, dst_0, flags, brw_imm_ud(0x0f));
   brw_AND(p, dst_4, flags, brw_imm_ud(0xf0));
   brw_SHR(p, dst_4, dst_4, brw_imm_ud(4));

   brw_pop_insn_state(p);
}
Esempio n. 14
0
void
vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
                                                  struct brw_reg index)
{
   int second_vertex_offset;

   if (brw->gen >= 6)
      second_vertex_offset = 1;
   else
      second_vertex_offset = 16;

   m1 = retype(m1, BRW_REGISTER_TYPE_D);

   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
    * M1.4 are used, and the rest are ignored.
    */
   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
   struct brw_reg index_0 = suboffset(vec1(index), 0);
   struct brw_reg index_4 = suboffset(vec1(index), 4);

   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_access_mode(p, BRW_ALIGN_1);

   brw_MOV(p, m1_0, index_0);

   if (index.file == BRW_IMMEDIATE_VALUE) {
      index_4.dw1.ud += second_vertex_offset;
      brw_MOV(p, m1_4, index_4);
   } else {
      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
   }

   brw_pop_insn_state(p);
}
/* Interpolate between two vertices and put the result into a0.0.
 * Increment a0.0 accordingly.
 *
 * Beware that dest_ptr can be equal to v0_ptr!
 */
void brw_clip_interp_vertex( struct brw_clip_compile *c,
			     struct brw_indirect dest_ptr,
			     struct brw_indirect v0_ptr, /* from */
			     struct brw_indirect v1_ptr, /* to */
			     struct brw_reg t0,
			     bool force_edgeflag)
{
   struct brw_codegen *p = &c->func;
   struct brw_reg t_nopersp, v0_ndc_copy;
   GLuint slot;

   /* Just copy the vertex header:
    */
   /*
    * After CLIP stage, only first 256 bits of the VUE are read
    * back on Ironlake, so needn't change it
    */
   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);


   /* First handle the 3D and NDC interpolation, in case we
    * need noperspective interpolation. Doing it early has no
    * performance impact in any case.
    */

   /* Take a copy of the v0 NDC coordinates, in case dest == v0. */
   if (c->has_noperspective_shading) {
      GLuint offset = brw_varying_to_offset(&c->vue_map,
                                                 BRW_VARYING_SLOT_NDC);
      v0_ndc_copy = get_tmp(c);
      brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));
   }

   /* Compute the new 3D position
    *
    * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0
    */
   {
      GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
      struct brw_reg tmp = get_tmp(c);
      brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0);
      brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0);
      brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp);
      release_tmp(c, tmp);
   }

   /* Recreate the projected (NDC) coordinate in the new vertex header */
   brw_clip_project_vertex(c, dest_ptr);

   /* If we have noperspective attributes,
    * we need to compute the screen-space t
    */
   if (c->has_noperspective_shading) {
      GLuint delta = brw_varying_to_offset(&c->vue_map,
                                                BRW_VARYING_SLOT_NDC);
      struct brw_reg tmp = get_tmp(c);
      t_nopersp = get_tmp(c);

      /* t_nopersp = vec4(v1.xy, dest.xy) */
      brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
      brw_MOV(p, tmp, deref_4f(dest_ptr, delta));
      brw_set_default_access_mode(p, BRW_ALIGN_16);
      brw_MOV(p,
              brw_writemask(t_nopersp, WRITEMASK_ZW),
              brw_swizzle(tmp, 0, 1, 0, 1));

      /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */
      brw_ADD(p, t_nopersp, t_nopersp,
              negate(brw_swizzle(v0_ndc_copy, 0, 1, 0, 1)));

      /* Add the absolute values of the X and Y deltas so that if
       * the points aren't in the same place on the screen we get
       * nonzero values to divide.
       *
       * After that, we have vert1 - vert0 in t_nopersp.x and
       * vertnew - vert0 in t_nopersp.y
       *
       * t_nopersp = vec2(|v1.x  -v0.x| + |v1.y  -v0.y|,
       *                  |dest.x-v0.x| + |dest.y-v0.y|)
       */
      brw_ADD(p,
              brw_writemask(t_nopersp, WRITEMASK_XY),
              brw_abs(brw_swizzle(t_nopersp, 0, 2, 0, 0)),
              brw_abs(brw_swizzle(t_nopersp, 1, 3, 0, 0)));
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* If the points are in the same place, just substitute a
       * value to avoid divide-by-zero
       */
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
              vec1(t_nopersp),
              brw_imm_f(0));
      brw_IF(p, BRW_EXECUTE_1);
      brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0),
                                        brw_float_to_vf(0.0),
                                        brw_float_to_vf(0.0),
                                        brw_float_to_vf(0.0)));
      brw_ENDIF(p);

      /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
      brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));
      brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp),
            vec1(suboffset(t_nopersp, 1)));
      brw_set_default_access_mode(p, BRW_ALIGN_16);
      brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, 0, 0, 0, 0));
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      release_tmp(c, tmp);
      release_tmp(c, v0_ndc_copy);
   }

   /* Now we can iterate over each attribute
    * (could be done in pairs?)
    */
   for (slot = 0; slot < c->vue_map.num_slots; slot++) {
      int varying = c->vue_map.slot_to_varying[slot];
      GLuint delta = brw_vue_slot_to_offset(slot);

      /* HPOS, NDC already handled above */
      if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC)
         continue;


      if (varying == VARYING_SLOT_EDGE) {
	 if (force_edgeflag)
	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
	 else
	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
      } else if (varying == VARYING_SLOT_PSIZ) {
         /* PSIZ doesn't need interpolation because it isn't used by the
          * fragment shader.
          */
      } else if (varying < VARYING_SLOT_MAX) {
	 /* This is a true vertex result (and not a special value for the VUE
	  * header), so interpolate:
	  *
	  *        New = attr0 + t*attr1 - t*attr0
          *
          * Unless the attribute is flat shaded -- in which case just copy
          * from one of the sources (doesn't matter which; already copied from pv)
	  */
         GLuint interp = c->key.interpolation_mode.mode[slot];

         if (interp != INTERP_QUALIFIER_FLAT) {
            struct brw_reg tmp = get_tmp(c);
            struct brw_reg t =
               interp == INTERP_QUALIFIER_NOPERSPECTIVE ? t_nopersp : t0;

            brw_MUL(p,
                  vec4(brw_null_reg()),
                  deref_4f(v1_ptr, delta),
                  t);

            brw_MAC(p,
                  tmp,
                  negate(deref_4f(v0_ptr, delta)),
                  t);

            brw_ADD(p,
                  deref_4f(dest_ptr, delta),
                  deref_4f(v0_ptr, delta),
                  tmp);

            release_tmp(c, tmp);
         }
         else {
            brw_MOV(p,
                  deref_4f(dest_ptr, delta),
                  deref_4f(v0_ptr, delta));
         }
      }
   }

   if (c->vue_map.num_slots % 2) {
      GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);

      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
   }

   if (c->has_noperspective_shading)
      release_tmp(c, t_nopersp);
}
Esempio n. 16
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_codegen *p = &c->func;
   brw_inst *inst;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_ff_gs_alloc_regs(c, num_verts, true);
   brw_ff_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         inst = brw_MOV(p, destination_indices_uw,
                        brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                                : 0x00020001)); /* (1, 0, 2) */
         brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL);
      }

      assert(c->reg.destination_indices.width == BRW_EXECUTE_4);
      brw_push_insn_state(p);
      brw_set_default_exec_size(p, BRW_EXECUTE_4);
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
      brw_pop_insn_state(p);
      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_default_access_mode(p, BRW_ALIGN_16);
            brw_push_insn_state(p);
            brw_set_default_exec_size(p, BRW_EXECUTE_4);

            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_pop_insn_state(p);

            brw_set_default_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_ff_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_ff_gs_ff_sync(c, 1);

   brw_ff_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_ff_gs_offset_header_dw2(c,
                                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_ff_gs_offset_header_dw2(c,
                                  URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}
Esempio n. 17
0
void
vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                                                 struct brw_reg dst,
                                                 struct brw_reg surf_index,
                                                 struct brw_reg offset)
{
   assert(surf_index.type == BRW_REGISTER_TYPE_UD);

   if (surf_index.file == BRW_IMMEDIATE_VALUE) {

      brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn, dst);
      brw_set_src0(p, insn, offset);
      brw_set_sampler_message(p, insn,
                              surf_index.dw1.ud,
                              0, /* LD message ignores sampler unit */
                              GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                              1, /* rlen */
                              1, /* mlen */
                              false, /* no header */
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              0);

      brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);

   } else {

      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));

      brw_push_insn_state(p);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* a0.0 = surf_index & 0xff */
      brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
      brw_inst_set_exec_size(p->brw, insn_and, BRW_EXECUTE_1);
      brw_set_dest(p, insn_and, addr);
      brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD)));
      brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));


      /* a0.0 |= <descriptor> */
      brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
      brw_set_sampler_message(p, insn_or,
                              0 /* surface */,
                              0 /* sampler */,
                              GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                              1 /* rlen */,
                              1 /* mlen */,
                              false /* header */,
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              0);
      brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
      brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
      brw_set_src0(p, insn_or, addr);
      brw_set_dest(p, insn_or, addr);


      /* dst = send(offset, a0.0) */
      brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn_send, dst);
      brw_set_src0(p, insn_send, offset);
      brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);

      brw_pop_insn_state(p);

      /* visitor knows more than we do about the surface limit required,
       * so has already done marking.
       */
   }
}
Esempio n. 18
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src)
{
    int msg_type = -1;

    if (brw->gen >= 5) {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
            }
            break;
        case SHADER_OPCODE_TXD:
            if (inst->shadow_compare) {
                /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
                assert(brw->gen >= 8 || brw->is_haswell);
                msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
            }
            break;
        case SHADER_OPCODE_TXF:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_CMS:
            if (brw->gen >= 7)
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
            else
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_MCS:
            assert(brw->gen >= 7);
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
            break;
        case SHADER_OPCODE_TXS:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
            break;
        case SHADER_OPCODE_TG4:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
            }
            break;
        case SHADER_OPCODE_TG4_OFFSET:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
            }
            break;
        default:
            unreachable("should not get here: invalid vec4 texture opcode");
        }
    } else {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
                assert(inst->mlen == 3);
            } else {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
                assert(inst->mlen == 2);
            }
            break;
        case SHADER_OPCODE_TXD:
            /* There is no sample_d_c message; comparisons are done manually. */
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
            assert(inst->mlen == 4);
            break;
        case SHADER_OPCODE_TXF:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
            assert(inst->mlen == 2);
            break;
        case SHADER_OPCODE_TXS:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
            assert(inst->mlen == 2);
            break;
        default:
            unreachable("should not get here: invalid vec4 texture opcode");
        }
    }

    assert(msg_type != -1);

    /* Load the message header if present.  If there's a texture offset, we need
     * to set it up explicitly and load the offset bitfield.  Otherwise, we can
     * use an implied move from g0 to the first message register.
     */
    if (inst->header_present) {
        if (brw->gen < 6 && !inst->texture_offset) {
            /* Set up an implied move from g0 to the MRF. */
            src = brw_vec8_grf(0, 0);
        } else {
            struct brw_reg header =
                retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

            /* Explicitly set up the message header by copying g0 to the MRF. */
            brw_push_insn_state(p);
            brw_set_default_mask_control(p, BRW_MASK_DISABLE);
            brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

            brw_set_default_access_mode(p, BRW_ALIGN_1);

            if (inst->texture_offset) {
                /* Set the texel offset bits in DWord 2. */
                brw_MOV(p, get_element_ud(header, 2),
                        brw_imm_ud(inst->texture_offset));
            }

            if (inst->sampler >= 16) {
                /* The "Sampler Index" field can only store values between 0 and 15.
                 * However, we can add an offset to the "Sampler State Pointer"
                 * field, effectively selecting a different set of 16 samplers.
                 *
                 * The "Sampler State Pointer" needs to be aligned to a 32-byte
                 * offset, and each sampler state is only 16-bytes, so we can't
                 * exclusively use the offset - we have to use both.
                 */
                assert(brw->gen >= 8 || brw->is_haswell);
                brw_ADD(p,
                        get_element_ud(header, 3),
                        get_element_ud(brw_vec8_grf(0, 0), 3),
                        brw_imm_ud(16 * (inst->sampler / 16) *
                                   sizeof(gen7_sampler_state)));
            }
            brw_pop_insn_state(p);
        }
    }

    uint32_t return_format;

    switch (dst.type) {
    case BRW_REGISTER_TYPE_D:
        return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
        break;
    case BRW_REGISTER_TYPE_UD:
        return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
        break;
    default:
        return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
        break;
    }

    uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 ||
                               inst->opcode == SHADER_OPCODE_TG4_OFFSET)
                              ? prog_data->base.binding_table.gather_texture_start
                              : prog_data->base.binding_table.texture_start) + inst->sampler;

    brw_SAMPLE(p,
               dst,
               inst->base_mrf,
               src,
               surface_index,
               inst->sampler % 16,
               msg_type,
               1, /* response length */
               inst->mlen,
               inst->header_present,
               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
               return_format);

    brw_mark_surface_used(&prog_data->base, surface_index);
}
Esempio n. 19
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src,
                             struct brw_reg sampler_index)
{
   int msg_type = -1;

   if (brw->gen >= 5) {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
	 } else {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
	 }
	 break;
      case SHADER_OPCODE_TXD:
         if (inst->shadow_compare) {
            /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
            assert(brw->gen >= 8 || brw->is_haswell);
            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
         } else {
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
         }
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
	 break;
      case SHADER_OPCODE_TXF_CMS:
         if (brw->gen >= 7)
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
         else
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
         break;
      case SHADER_OPCODE_TXF_MCS:
         assert(brw->gen >= 7);
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
         break;
      case SHADER_OPCODE_TXS:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
	 break;
      case SHADER_OPCODE_TG4:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
         }
         break;
      case SHADER_OPCODE_TG4_OFFSET:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
         }
         break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   } else {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
	    assert(inst->mlen == 3);
	 } else {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
	    assert(inst->mlen == 2);
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
	 assert(inst->mlen == 4);
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
	 assert(inst->mlen == 2);
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
	 assert(inst->mlen == 2);
	 break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   }

   assert(msg_type != -1);

   assert(sampler_index.type == BRW_REGISTER_TYPE_UD);

   /* Load the message header if present.  If there's a texture offset, we need
    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
    * use an implied move from g0 to the first message register.
    */
   if (inst->header_present) {
      if (brw->gen < 6 && !inst->texture_offset) {
         /* Set up an implied move from g0 to the MRF. */
         src = brw_vec8_grf(0, 0);
      } else {
         struct brw_reg header =
            retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

         /* Explicitly set up the message header by copying g0 to the MRF. */
         brw_push_insn_state(p);
         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
         brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

         brw_set_default_access_mode(p, BRW_ALIGN_1);

         if (inst->texture_offset) {
            /* Set the texel offset bits in DWord 2. */
            brw_MOV(p, get_element_ud(header, 2),
                    brw_imm_ud(inst->texture_offset));
         }

         brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
         brw_pop_insn_state(p);
      }
   }

   uint32_t return_format;

   switch (dst.type) {
   case BRW_REGISTER_TYPE_D:
      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
      break;
   case BRW_REGISTER_TYPE_UD:
      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
      break;
   default:
      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
      break;
   }

   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
         inst->opcode == SHADER_OPCODE_TG4_OFFSET)
         ? prog_data->base.binding_table.gather_texture_start
         : prog_data->base.binding_table.texture_start;

   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
      uint32_t sampler = sampler_index.dw1.ud;

      brw_SAMPLE(p,
                 dst,
                 inst->base_mrf,
                 src,
                 sampler + base_binding_table_index,
                 sampler % 16,
                 msg_type,
                 1, /* response length */
                 inst->mlen,
                 inst->header_present,
                 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                 return_format);

      brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
   } else {
      /* Non-constant sampler index. */
      /* Note: this clobbers `dst` as a temporary before emitting the send */

      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
      struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));

      struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));

      brw_push_insn_state(p);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* Some care required: `sampler` and `temp` may alias:
       *    addr = sampler & 0xff
       *    temp = (sampler << 8) & 0xf00
       *    addr = addr | temp
       */
      brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
      brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
      brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
      brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
      brw_OR(p, addr, addr, temp);

      /* a0.0 |= <descriptor> */
      brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
      brw_set_sampler_message(p, insn_or,
                              0 /* surface */,
                              0 /* sampler */,
                              msg_type,
                              1 /* rlen */,
                              inst->mlen /* mlen */,
                              inst->header_present /* header */,
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              return_format);
      brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
      brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
      brw_set_src0(p, insn_or, addr);
      brw_set_dest(p, insn_or, addr);


      /* dst = send(offset, a0.0) */
      brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn_send, dst);
      brw_set_src0(p, insn_send, src);
      brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);

      brw_pop_insn_state(p);

      /* visitor knows more than we do about the surface limit required,
       * so has already done marking.
       */
   }
}
Esempio n. 20
0
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);
   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      pc_coord_replace = calculate_point_sprite_mask(c, i);
      pc_persp &= ~pc_coord_replace;

      if (pc_persp) {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }

      /* Point sprite coordinate replacement: A texcoord with this
       * enabled gets replaced with the value (x, y, 0, 1) where x and
       * y vary from 0 to 1 across the horizontal and vertical of the
       * point.
       */
      if (pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc_coord_replace);
	 /* Caculate 1.0/PointWidth */
	 gen4_math(&c->func,
		   c->tmp,
		   BRW_MATH_FUNCTION_INV,
		   0,
		   c->dx0,
		   BRW_MATH_DATA_SCALAR,
		   BRW_MATH_PRECISION_FULL);

	 brw_set_default_access_mode(p, BRW_ALIGN_16);

	 /* dA/dx, dA/dy */
	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
	 } else {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
	 }

	 /* attribute constant offset */
	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
	 } else {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
	 }

	 brw_set_default_access_mode(p, BRW_ALIGN_1);
      }

      if (pc & ~pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
	 brw_MOV(p, c->m3C0, a0); /* constant value */
      }


      set_predicate_control_flag_value(p, c, pc);
      /* Copy m0..m3 to URB. */
      brw_urb_WRITE(p,
		    brw_null_reg(),
		    0,
		    brw_vec8_grf(0, 0),
                    last ? BRW_URB_WRITE_EOT_COMPLETE
                    : BRW_URB_WRITE_NO_FLAGS,
		    4, 	/* msg len */
		    0,	/* response len */
		    i*4,	/* urb destination offset */
		    BRW_URB_SWIZZLE_TRANSPOSE);
   }

   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
Esempio n. 21
0
void
vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
                                              struct brw_reg src)
{
   /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
    * Header: M0.5):
    *
    *     15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
    *
    *        When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
    *        DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
    *        Vertex 0 DATA[7].  This bit is ANDed with the corresponding
    *        channel enable to determine the final channel enable.  For the
    *        URB_READ_OWORD & URB_READ_HWORD messages, when final channel
    *        enable is 1 it indicates that Vertex 1 DATA [3] will be included
    *        in the writeback message.  For the URB_WRITE_OWORD &
    *        URB_WRITE_HWORD messages, when final channel enable is 1 it
    *        indicates that Vertex 1 DATA [3] will be written to the surface.
    *
    *        0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
    *        1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
    *
    *     14 Vertex 1 DATA [2] Channel Mask
    *     13 Vertex 1 DATA [1] Channel Mask
    *     12 Vertex 1 DATA [0] Channel Mask
    *     11 Vertex 0 DATA [3] Channel Mask
    *     10 Vertex 0 DATA [2] Channel Mask
    *      9 Vertex 0 DATA [1] Channel Mask
    *      8 Vertex 0 DATA [0] Channel Mask
    *
    * (This is from a section of the PRM that is agnostic to the particular
    * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
    * geometry shader invocations 0 and 1, respectively).  Since we have the
    * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
    * and the enable flags for geometry shader invocation 1 in bits 7:0 of
    * DWORD 4, we just need to OR them together and store the result in bits
    * 15:8 of DWORD 5.
    *
    * It's easier to get the EU to do this if we think of the src and dst
    * registers as composed of 32 bytes each; then, we want to pick up the
    * contents of bytes 0 and 16 from src, OR them together, and store them in
    * byte 21.
    *
    * We can do that by the following EU instruction:
    *
    *     or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
    *
    * Note: this relies on the source register having zeros in (a) bits 7:4 of
    * DWORD 0 and (b) bits 3:0 of DWORD 4.  We can rely on (b) because the
    * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
    * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
    * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
    * contain valid channel mask values (which are in the range 0x0-0xf).
    */
   dst = retype(dst, BRW_REGISTER_TYPE_UB);
   src = retype(src, BRW_REGISTER_TYPE_UB);
   brw_push_insn_state(p);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
   brw_pop_insn_state(p);
}