Exemplo n.º 1
0
static void merge_edgeflags( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);

   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
   brw_CMP(p, 
	   vec1(brw_null_reg()), 
	   BRW_CONDITIONAL_EQ, 
	   tmp0,
	   brw_imm_ud(_3DPRIM_POLYGON));

   /* Get away with using reg.vertex because we know that this is not
    * a _3DPRIM_TRISTRIP_REVERSE:
    */
   brw_IF(p, BRW_EXECUTE_1);
   {   
      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
      brw_MOV(p, byte_offset(c->reg.vertex[0],
                             brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_EDGE)),
              brw_imm_f(0));
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);

      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
      brw_MOV(p, byte_offset(c->reg.vertex[2],
                             brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_EDGE)),
              brw_imm_f(0));
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
   }
   brw_ENDIF(p);
}
static void emit_min(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg src0, src1, dst;
    int i;
    brw_push_insn_state(p);
    for (i = 0; i < 4; i++) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_MOV(p, dst, src0);
	    brw_set_saturate(p, 0);

	    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	    brw_MOV(p, dst, src1);
	    brw_set_saturate(p, 0);
	    brw_set_predicate_control_flag_value(p, 0xff);
	}
    }
    brw_pop_insn_state(p);
}
Exemplo n.º 3
0
static bool
run_tests(struct brw_context *brw)
{
   bool fail = false;

   for (int i = 0; i < ARRAY_SIZE(tests); i++) {
      for (int align_16 = 0; align_16 <= 1; align_16++) {
	 struct brw_compile *p = rzalloc(NULL, struct brw_compile);
	 brw_init_compile(brw, p, p);

	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 if (align_16)
	    brw_set_access_mode(p, BRW_ALIGN_16);
	 else
	    brw_set_access_mode(p, BRW_ALIGN_1);

	 tests[i].func(p);
	 assert(p->nr_insn == 1);

	 if (!test_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 if (!test_fuzz_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 ralloc_free(p);
      }
   }

   return fail;
}
Exemplo n.º 4
0
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    if (c->need_ff_sync) {
        struct brw_compile *p = &c->func;
        struct brw_instruction *need_ff_sync;

        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p, 
                    c->reg.R0,
                    0,
                    c->reg.R0,
                    1,	
                    1,		/* used */
                    1,  	/* msg length */
                    1,		/* response length */
                    0,		/* eot */
                    1,		/* write compelete */
                    0,		/* urb offset */
                    BRW_URB_SWIZZLE_NONE);
        }
        brw_ENDIF(p, need_ff_sync);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
static void emit_lit( struct brw_compile *p, 
		      const struct brw_reg *dst,
		      GLuint mask,
		      const struct brw_reg *arg0 )
{
   assert((mask & WRITEMASK_XW) == 0);

   if (mask & WRITEMASK_Y) {
      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
      brw_MOV(p, dst[1], arg0[0]);
      brw_set_saturate(p, 0);
   }

   if (mask & WRITEMASK_Z) {
      emit_math2(p, BRW_MATH_FUNCTION_POW,
		 &dst[2],
		 WRITEMASK_X | (mask & SATURATE),
		 &arg0[1],
		 &arg0[3]);
   }

   /* Ordinarily you'd use an iff statement to skip or shortcircuit
    * some of the POW calculations above, but 16-wide iff statements
    * seem to lock c1 hardware, so this is a nasty workaround:
    */
   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
   {
      if (mask & WRITEMASK_Y) 
	 brw_MOV(p, dst[1], brw_imm_f(0));

      if (mask & WRITEMASK_Z) 
	 brw_MOV(p, dst[2], brw_imm_f(0)); 
   }
   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Exemplo n.º 6
0
void
vec4_generator::generate_math2_gen4(vec4_instruction *inst,
                                    struct brw_reg dst,
                                    struct brw_reg src0,
                                    struct brw_reg src1)
{
    /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
     * "Message Payload":
     *
     * "Operand0[7].  For the INT DIV functions, this operand is the
     *  denominator."
     *  ...
     * "Operand1[7].  For the INT DIV functions, this operand is the
     *  numerator."
     */
    bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
    struct brw_reg &op0 = is_int_div ? src1 : src0;
    struct brw_reg &op1 = is_int_div ? src0 : src1;

    brw_push_insn_state(p);
    brw_set_saturate(p, false);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
    brw_pop_insn_state(p);

    brw_math(p,
             dst,
             brw_math_function(inst->opcode),
             inst->base_mrf,
             op0,
             BRW_MATH_DATA_VECTOR,
             BRW_MATH_PRECISION_FULL);
}
Exemplo n.º 7
0
/* Points setup - several simplifications as all attributes are
 * constant across the face of the point (point sprites excluded!)
 */
void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);

   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */

   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {				
	 /* This seems odd as the values are all constant, but the
	  * fragment shader will be expecting it:
	  */
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }


      /* The delta values are always zero, just send the starting
       * coordinate.  Again, this is to fit in with the interpolation
       * code in the fragment shader.
       */
      {
	 set_predicate_control_flag_value(p, c, pc);

	 brw_MOV(p, c->m3C0, a0); /* constant value */

	 /* Copy m0..m3 to URB.
	  */
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0),
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* urb destination offset */
		       BRW_URB_SWIZZLE_TRANSPOSE);
      }
   }

   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Exemplo n.º 8
0
static void
gen_f0_0_MOV_GRF_GRF(struct brw_compile *p)
{
   struct brw_reg g0 = brw_vec8_grf(0, 0);
   struct brw_reg g2 = brw_vec8_grf(2, 0);

   brw_push_insn_state(p);
   brw_set_predicate_control(p, true);
   brw_MOV(p, g0, g2);
   brw_pop_insn_state(p);
}
Exemplo n.º 9
0
void emit_sop(struct brw_compile *p,
	      const struct brw_reg *dst,
	      GLuint mask,
	      GLuint cond,
	      const struct brw_reg *arg0,
	      const struct brw_reg *arg1)
{
   GLuint i;

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {	
	 brw_push_insn_state(p);
	 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 brw_MOV(p, dst[i], brw_imm_f(0));
	 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	 brw_MOV(p, dst[i], brw_imm_f(1.0));
	 brw_pop_insn_state(p);
      }
   }
}
Exemplo n.º 10
0
void emit_sign(struct brw_compile *p,
	       const struct brw_reg *dst,
	       GLuint mask,
	       const struct brw_reg *arg0)
{
   GLuint i;

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 brw_MOV(p, dst[i], brw_imm_f(0.0));

	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
	 brw_MOV(p, dst[i], brw_imm_f(-1.0));
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);

	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0[i], brw_imm_f(0));
	 brw_MOV(p, dst[i], brw_imm_f(1.0));
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      }
   }
}
Exemplo n.º 11
0
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7.  Explicitly test
 * it, so that we run the fuzzing can run over all the other bits that might
 * interact with it.
 */
static void
gen_f0_1_MOV_GRF_GRF(struct brw_compile *p)
{
   struct brw_reg g0 = brw_vec8_grf(0, 0);
   struct brw_reg g2 = brw_vec8_grf(2, 0);

   brw_push_insn_state(p);
   brw_set_predicate_control(p, true);
   struct brw_instruction *mov = brw_MOV(p, g0, g2);
   mov->bits2.da1.flag_subreg_nr = 1;
   brw_pop_insn_state(p);
}
Exemplo n.º 12
0
/**
 * Read a float[4] vector from the data port Data Cache (const buffer).
 * Location (in buffer) should be a multiple of 16.
 * Used for fetching shader constants.
 * If relAddr is true, we'll do an indirect fetch using the address register.
 */
void brw_dp_READ_4( struct brw_compile *p,
                    struct brw_reg dest,
                    GLboolean relAddr,
                    GLuint location,
                    GLuint bind_table_index )
{
   /* XXX: relAddr not implemented */
   GLuint msg_reg_nr = 1;
   {
      struct brw_reg b;
      brw_push_insn_state(p);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_mask_control(p, BRW_MASK_DISABLE);

   /* Setup MRF[1] with location/offset into const buffer */
      b = brw_message_reg(msg_reg_nr);
      b = retype(b, BRW_REGISTER_TYPE_UD);
      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
       */
      brw_MOV(p, b, brw_imm_ud(location));
      brw_pop_insn_state(p);
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = BRW_PREDICATE_NONE;
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
      insn->header.mask_control = BRW_MASK_DISABLE;
  
      /* cast dest to a uword[8] vector */
      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);

      brw_set_dest(insn, dest);
      brw_set_src0(insn, brw_null_reg());

      brw_set_dp_read_message(p->brw,
			      insn,
			      bind_table_index,
			      0,  /* msg_control (0 means 1 Oword) */
			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
			      0, /* source cache = data cache */
			      1, /* msg_length */
			      1, /* response_length (1 Oword) */
			      0); /* eot */
   }
}
static void emit_sop(struct brw_wm_compile *c,
		struct prog_instruction *inst, GLuint cond)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg dst, src0, src1;
    int i;

    for (i = 0; i < 4; i++) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
	    brw_push_insn_state(p);
	    brw_CMP(p, brw_null_reg(), cond, src0, src1);
	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	    brw_MOV(p, dst, brw_imm_f(0.0));
	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	    brw_MOV(p, dst, brw_imm_f(1.0));
	    brw_pop_insn_state(p);
	}
    }
}
/**
 * Sets VERT_RESULT_FOGC.Y  for gl_FrontFacing
 *
 * This is currently executed if the fragment program uses VERT_RESULT_FOGC
 * at all, but this could be eliminated with a scan of the FP contents.
 */
static void
do_front_facing( struct brw_sf_compile *c )
{
   struct brw_compile *p = &c->func; 
   int i;

   if (!have_attr(c, VERT_RESULT_FOGC))
      return;

   brw_push_insn_state(p);
   brw_CMP(p, brw_null_reg(), 
        c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L,
        c->det, brw_imm_f(0));
   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
   for (i = 0; i < 3; i++) {
       struct brw_reg fogc = get_vert_attr(c, c->vert[i],FRAG_ATTRIB_FOGC);
       brw_MOV(p, get_element(fogc, 1), brw_imm_f(0));
       brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
       brw_MOV(p, get_element(fogc, 1), brw_imm_f(1));
       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
   }
   brw_pop_insn_state(p);
}
Exemplo n.º 15
0
/*
  GLfloat iz	= 1.0 / dir.z;
  GLfloat ac	= dir.x * iz;
  GLfloat bc	= dir.y * iz;
  offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
  offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
  offset *= MRD;
*/
static void compute_offset( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg off = c->reg.offset;
   struct brw_reg dir = c->reg.dir;
   
   brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
   brw_MUL(p, vec2(off), dir, get_element(off, 2));

   brw_CMP(p, 
	   vec1(brw_null_reg()), 
	   BRW_CONDITIONAL_GE,
	   brw_abs(get_element(off, 0)), 
	   brw_abs(get_element(off, 1)));

   brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
   brw_set_predicate_control(p, BRW_PREDICATE_NONE);

   brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
   brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
}
Exemplo n.º 16
0
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    struct intel_context *intel = &c->func.brw->intel;

    if (intel->needs_ff_sync) {
        struct brw_compile *p = &c->func;

        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
			c->reg.R0,
			0,
			c->reg.R0,
			1, /* allocate */
			1, /* response length */
			0 /* eot */);
        }
        brw_ENDIF(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
Exemplo n.º 17
0
/**
 * Generate assembly for a Vec4 IR instruction.
 *
 * \param instruction The Vec4 IR instruction to generate code for.
 * \param dst         The destination register.
 * \param src         An array of up to three source registers.
 */
void
vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
        struct brw_reg dst,
        struct brw_reg *src)
{
    vec4_instruction *inst = (vec4_instruction *) instruction;

    if (dst.width == BRW_WIDTH_4) {
        /* This happens in attribute fixups for "dual instanced" geometry
         * shaders, since they use attributes that are vec4's.  Since the exec
         * width is only 4, it's essential that the caller set
         * force_writemask_all in order to make sure the instruction is executed
         * regardless of which channels are enabled.
         */
        assert(inst->force_writemask_all);

        /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
         * the following register region restrictions (from Graphics BSpec:
         * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
         * > Register Region Restrictions)
         *
         *     1. ExecSize must be greater than or equal to Width.
         *
         *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
         *        to Width * HorzStride."
         */
        for (int i = 0; i < 3; i++) {
            if (src[i].file == BRW_GENERAL_REGISTER_FILE)
                src[i] = stride(src[i], 4, 4, 1);
        }
    }

    switch (inst->opcode) {
    case BRW_OPCODE_MOV:
        brw_MOV(p, dst, src[0]);
        break;
    case BRW_OPCODE_ADD:
        brw_ADD(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_MUL:
        brw_MUL(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_MACH:
        brw_set_acc_write_control(p, 1);
        brw_MACH(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;

    case BRW_OPCODE_MAD:
        assert(brw->gen >= 6);
        brw_MAD(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_FRC:
        brw_FRC(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDD:
        brw_RNDD(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDE:
        brw_RNDE(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDZ:
        brw_RNDZ(p, dst, src[0]);
        break;

    case BRW_OPCODE_AND:
        brw_AND(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_OR:
        brw_OR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_XOR:
        brw_XOR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_NOT:
        brw_NOT(p, dst, src[0]);
        break;
    case BRW_OPCODE_ASR:
        brw_ASR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_SHR:
        brw_SHR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_SHL:
        brw_SHL(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_CMP:
        brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
        break;
    case BRW_OPCODE_SEL:
        brw_SEL(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DPH:
        brw_DPH(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP4:
        brw_DP4(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP3:
        brw_DP3(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP2:
        brw_DP2(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_F32TO16:
        assert(brw->gen >= 7);
        brw_F32TO16(p, dst, src[0]);
        break;

    case BRW_OPCODE_F16TO32:
        assert(brw->gen >= 7);
        brw_F16TO32(p, dst, src[0]);
        break;

    case BRW_OPCODE_LRP:
        assert(brw->gen >= 6);
        brw_LRP(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_BFREV:
        assert(brw->gen >= 7);
        /* BFREV only supports UD type for src and dst. */
        brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
                  retype(src[0], BRW_REGISTER_TYPE_UD));
        break;
    case BRW_OPCODE_FBH:
        assert(brw->gen >= 7);
        /* FBH only supports UD type for dst. */
        brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_FBL:
        assert(brw->gen >= 7);
        /* FBL only supports UD type for dst. */
        brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_CBIT:
        assert(brw->gen >= 7);
        /* CBIT only supports UD type for dst. */
        brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_ADDC:
        assert(brw->gen >= 7);
        brw_set_acc_write_control(p, 1);
        brw_ADDC(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;
    case BRW_OPCODE_SUBB:
        assert(brw->gen >= 7);
        brw_set_acc_write_control(p, 1);
        brw_SUBB(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;

    case BRW_OPCODE_BFE:
        assert(brw->gen >= 7);
        brw_BFE(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_BFI1:
        assert(brw->gen >= 7);
        brw_BFI1(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_BFI2:
        assert(brw->gen >= 7);
        brw_BFI2(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_IF:
        if (inst->src[0].file != BAD_FILE) {
            /* The instruction has an embedded compare (only allowed on gen6) */
            assert(brw->gen == 6);
            gen6_IF(p, inst->conditional_mod, src[0], src[1]);
        } else {
            struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
            brw_inst->header.predicate_control = inst->predicate;
        }
        break;

    case BRW_OPCODE_ELSE:
        brw_ELSE(p);
        break;
    case BRW_OPCODE_ENDIF:
        brw_ENDIF(p);
        break;

    case BRW_OPCODE_DO:
        brw_DO(p, BRW_EXECUTE_8);
        break;

    case BRW_OPCODE_BREAK:
        brw_BREAK(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
        break;
    case BRW_OPCODE_CONTINUE:
        /* FINISHME: We need to write the loop instruction support still. */
        if (brw->gen >= 6)
            gen6_CONT(p);
        else
            brw_CONT(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
        break;

    case BRW_OPCODE_WHILE:
        brw_WHILE(p);
        break;

    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
    case SHADER_OPCODE_SQRT:
    case SHADER_OPCODE_EXP2:
    case SHADER_OPCODE_LOG2:
    case SHADER_OPCODE_SIN:
    case SHADER_OPCODE_COS:
        if (brw->gen == 6) {
            generate_math1_gen6(inst, dst, src[0]);
        } else {
            /* Also works for Gen7. */
            generate_math1_gen4(inst, dst, src[0]);
        }
        break;

    case SHADER_OPCODE_POW:
    case SHADER_OPCODE_INT_QUOTIENT:
    case SHADER_OPCODE_INT_REMAINDER:
        if (brw->gen >= 7) {
            generate_math2_gen7(inst, dst, src[0], src[1]);
        } else if (brw->gen == 6) {
            generate_math2_gen6(inst, dst, src[0], src[1]);
        } else {
            generate_math2_gen4(inst, dst, src[0], src[1]);
        }
        break;

    case SHADER_OPCODE_TEX:
    case SHADER_OPCODE_TXD:
    case SHADER_OPCODE_TXF:
    case SHADER_OPCODE_TXF_CMS:
    case SHADER_OPCODE_TXF_MCS:
    case SHADER_OPCODE_TXL:
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
        generate_tex(inst, dst, src[0]);
        break;

    case VS_OPCODE_URB_WRITE:
        generate_vs_urb_write(inst);
        break;

    case SHADER_OPCODE_GEN4_SCRATCH_READ:
        generate_scratch_read(inst, dst, src[0]);
        break;

    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
        generate_scratch_write(inst, dst, src[0], src[1]);
        break;

    case VS_OPCODE_PULL_CONSTANT_LOAD:
        generate_pull_constant_load(inst, dst, src[0], src[1]);
        break;

    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
        generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
        break;

    case GS_OPCODE_URB_WRITE:
        generate_gs_urb_write(inst);
        break;

    case GS_OPCODE_THREAD_END:
        generate_gs_thread_end(inst);
        break;

    case GS_OPCODE_SET_WRITE_OFFSET:
        generate_gs_set_write_offset(dst, src[0], src[1]);
        break;

    case GS_OPCODE_SET_VERTEX_COUNT:
        generate_gs_set_vertex_count(dst, src[0]);
        break;

    case GS_OPCODE_SET_DWORD_2_IMMED:
        generate_gs_set_dword_2_immed(dst, src[0]);
        break;

    case GS_OPCODE_PREPARE_CHANNEL_MASKS:
        generate_gs_prepare_channel_masks(dst);
        break;

    case GS_OPCODE_SET_CHANNEL_MASKS:
        generate_gs_set_channel_masks(dst, src[0]);
        break;

    case GS_OPCODE_GET_INSTANCE_ID:
        generate_gs_get_instance_id(dst);
        break;

    case SHADER_OPCODE_SHADER_TIME_ADD:
        brw_shader_time_add(p, src[0],
                            prog_data->base.binding_table.shader_time_start);
        brw_mark_surface_used(&prog_data->base,
                              prog_data->base.binding_table.shader_time_start);
        break;

    case SHADER_OPCODE_UNTYPED_ATOMIC:
        generate_untyped_atomic(inst, dst, src[0], src[1]);
        break;

    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
        generate_untyped_surface_read(inst, dst, src[0]);
        break;

    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
        generate_unpack_flags(inst, dst);
        break;

    default:
        if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
            _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",
                          opcode_descs[inst->opcode].name);
        } else {
            _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode);
        }
        abort();
    }
}
Exemplo n.º 18
0
void
vec4_generator::generate_scratch_write(vec4_instruction *inst,
                                       struct brw_reg dst,
                                       struct brw_reg src,
                                       struct brw_reg index)
{
    struct brw_reg header = brw_vec8_grf(0, 0);
    bool write_commit;

    /* If the instruction is predicated, we'll predicate the send, not
     * the header setup.
     */
    brw_set_predicate_control(p, false);

    gen6_resolve_implied_move(p, &header, inst->base_mrf);

    generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
                                      index);

    brw_MOV(p,
            retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
            retype(src, BRW_REGISTER_TYPE_D));

    uint32_t msg_type;

    if (brw->gen >= 7)
        msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
    else if (brw->gen == 6)
        msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
    else
        msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;

    brw_set_predicate_control(p, inst->predicate);

    /* Pre-gen6, we have to specify write commits to ensure ordering
     * between reads and writes within a thread.  Afterwards, that's
     * guaranteed and write commits only matter for inter-thread
     * synchronization.
     */
    if (brw->gen >= 6) {
        write_commit = false;
    } else {
        /* The visitor set up our destination register to be g0.  This
         * means that when the next read comes along, we will end up
         * reading from g0 and causing a block on the write commit.  For
         * write-after-read, we are relying on the value of the previous
         * read being used (and thus blocking on completion) before our
         * write is executed.  This means we have to be careful in
         * instruction scheduling to not violate this assumption.
         */
        write_commit = true;
    }

    /* Each of the 8 channel enables is considered for whether each
     * dword is written.
     */
    struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_set_dest(p, send, dst);
    brw_set_src0(p, send, header);
    if (brw->gen < 6)
        send->header.destreg__conditionalmod = inst->base_mrf;
    brw_set_dp_write_message(p, send,
                             255, /* binding table index: stateless access */
                             BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
                             msg_type,
                             3, /* mlen */
                             true, /* header present */
                             false, /* not a render target write */
                             write_commit, /* rlen */
                             false, /* eot */
                             write_commit);
}
Exemplo n.º 19
0
void
vec4_generator::generate_code(exec_list *instructions)
{
    int last_native_insn_offset = 0;
    const char *last_annotation_string = NULL;
    const void *last_annotation_ir = NULL;

    if (unlikely(debug_flag)) {
        if (shader_prog) {
            fprintf(stderr, "Native code for %s vertex shader %d:\n",
                    shader_prog->Label ? shader_prog->Label : "unnamed",
                    shader_prog->Name);
        } else {
            fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
        }
    }

    foreach_list(node, instructions) {
        vec4_instruction *inst = (vec4_instruction *)node;
        struct brw_reg src[3], dst;

        if (unlikely(debug_flag)) {
            if (last_annotation_ir != inst->ir) {
                last_annotation_ir = inst->ir;
                if (last_annotation_ir) {
                    fprintf(stderr, "   ");
                    if (shader_prog) {
                        ((ir_instruction *) last_annotation_ir)->fprint(stderr);
                    } else {
                        const prog_instruction *vpi;
                        vpi = (const prog_instruction *) inst->ir;
                        fprintf(stderr, "%d: ", (int)(vpi - prog->Instructions));
                        _mesa_fprint_instruction_opt(stderr, vpi, 0,
                                                     PROG_PRINT_DEBUG, NULL);
                    }
                    fprintf(stderr, "\n");
                }
            }
            if (last_annotation_string != inst->annotation) {
                last_annotation_string = inst->annotation;
                if (last_annotation_string)
                    fprintf(stderr, "   %s\n", last_annotation_string);
            }
        }

        for (unsigned int i = 0; i < 3; i++) {
            src[i] = inst->get_src(this->prog_data, i);
        }
        dst = inst->get_dst();

        brw_set_conditionalmod(p, inst->conditional_mod);
        brw_set_predicate_control(p, inst->predicate);
        brw_set_predicate_inverse(p, inst->predicate_inverse);
        brw_set_saturate(p, inst->saturate);
        brw_set_mask_control(p, inst->force_writemask_all);

        unsigned pre_emit_nr_insn = p->nr_insn;

        generate_vec4_instruction(inst, dst, src);

        if (inst->no_dd_clear || inst->no_dd_check) {
            assert(p->nr_insn == pre_emit_nr_insn + 1 ||
                   !"no_dd_check or no_dd_clear set for IR emitting more "
                   "than 1 instruction");

            struct brw_instruction *last = &p->store[pre_emit_nr_insn];

            if (inst->no_dd_clear)
                last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
            if (inst->no_dd_check)
                last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
        }

        if (unlikely(debug_flag)) {
            brw_dump_compile(p, stderr,
                             last_native_insn_offset, p->next_insn_offset);
        }

        last_native_insn_offset = p->next_insn_offset;
    }
Exemplo n.º 20
0
void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 3;

   if (allocate)
      alloc_regs(c);

   invert_det(c);
   copy_z_inv_w(c);

   if (c->key.do_twoside_color)
      do_twoside_color(c);

   if (c->has_flat_shading)
      do_flatshade_triangle(c);


   for (i = 0; i < c->nr_setup_regs; i++)
   {
      /* Pair of incoming attributes:
       */
      struct brw_reg a0 = offset(c->vert[0], i);
      struct brw_reg a1 = offset(c->vert[1], i);
      struct brw_reg a2 = offset(c->vert[2], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
	 brw_MUL(p, a1, a1, c->inv_w[1]);
	 brw_MUL(p, a2, a2, c->inv_w[2]);
      }


      /* Calculate coefficients for interpolated values:
       */
      if (pc_linear)
      {
	 set_predicate_control_flag_value(p, c, pc_linear);

	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));

	 /* calculate dA/dx
	  */
	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
		
	 /* calculate dA/dy
	  */
	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
      }

      {
	 set_predicate_control_flag_value(p, c, pc);
	 /* start point for interpolation
	  */
	 brw_MOV(p, c->m3C0, a0);

	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
	  * the send instruction:
	  */	
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* offset */
		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
      }
   }

   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Exemplo n.º 21
0
void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 2;

   if (allocate)
      alloc_regs(c);

   invert_det(c);
   copy_z_inv_w(c);

   if (c->has_flat_shading)
      do_flatshade_line(c);

   for (i = 0; i < c->nr_setup_regs; i++)
   {
      /* Pair of incoming attributes:
       */
      struct brw_reg a0 = offset(c->vert[0], i);
      struct brw_reg a1 = offset(c->vert[1], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
	 brw_MUL(p, a1, a1, c->inv_w[1]);
      }

      /* Calculate coefficients for position, color:
       */
      if (pc_linear) {
	 set_predicate_control_flag_value(p, c, pc_linear);

	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));

 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
		
	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
      }

      {
	 set_predicate_control_flag_value(p, c, pc);

	 /* start point for interpolation
	  */
	 brw_MOV(p, c->m3C0, a0);

	 /* Copy m0..m3 to URB.
	  */
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0),
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* urb destination offset */
		       BRW_URB_SWIZZLE_TRANSPOSE);
      }
   }

   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Exemplo n.º 22
0
static void brw_clip_test( struct brw_clip_compile *c )
{
    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);

    struct brw_reg v0 = get_tmp(c);
    struct brw_reg v1 = get_tmp(c);
    struct brw_reg v2 = get_tmp(c);

    struct brw_indirect vt0 = brw_indirect(0, 0);
    struct brw_indirect vt1 = brw_indirect(1, 0);
    struct brw_indirect vt2 = brw_indirect(2, 0);

    struct brw_compile *p = &c->func;
    struct brw_instruction *is_outside;
    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */

    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
    brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos));
    brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos));
    brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos));
    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));

    /* test nearz, xmin, ymin plane */
    /* clip.xyz < -clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    is_outside = brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p, is_outside);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* test farz, xmax, ymax plane */
    /* clip.xyz > clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    is_outside = brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p, is_outside);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    release_tmps(c);
}
Exemplo n.º 23
0
void
vec4_generator::generate_code(exec_list *instructions)
{
   int last_native_insn_offset = 0;
   const char *last_annotation_string = NULL;
   const void *last_annotation_ir = NULL;

   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
      if (shader) {
         printf("Native code for vertex shader %d:\n", prog->Name);
      } else {
         printf("Native code for vertex program %d:\n", c->vp->program.Base.Id);
      }
   }

   foreach_list(node, instructions) {
      vec4_instruction *inst = (vec4_instruction *)node;
      struct brw_reg src[3], dst;

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 if (last_annotation_ir != inst->ir) {
	    last_annotation_ir = inst->ir;
	    if (last_annotation_ir) {
	       printf("   ");
               if (shader) {
                  ((ir_instruction *) last_annotation_ir)->print();
               } else {
                  const prog_instruction *vpi;
                  vpi = (const prog_instruction *) inst->ir;
                  printf("%d: ", (int)(vpi - vp->Base.Instructions));
                  _mesa_fprint_instruction_opt(stdout, vpi, 0,
                                               PROG_PRINT_DEBUG, NULL);
               }
	       printf("\n");
	    }
	 }
	 if (last_annotation_string != inst->annotation) {
	    last_annotation_string = inst->annotation;
	    if (last_annotation_string)
	       printf("   %s\n", last_annotation_string);
	 }
      }

      for (unsigned int i = 0; i < 3; i++) {
	 src[i] = inst->get_src(i);
      }
      dst = inst->get_dst();

      brw_set_conditionalmod(p, inst->conditional_mod);
      brw_set_predicate_control(p, inst->predicate);
      brw_set_predicate_inverse(p, inst->predicate_inverse);
      brw_set_saturate(p, inst->saturate);

      switch (inst->opcode) {
      case BRW_OPCODE_MOV:
	 brw_MOV(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ADD:
	 brw_ADD(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MUL:
	 brw_MUL(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MACH:
	 brw_set_acc_write_control(p, 1);
	 brw_MACH(p, dst, src[0], src[1]);
	 brw_set_acc_write_control(p, 0);
	 break;

      case BRW_OPCODE_FRC:
	 brw_FRC(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDD:
	 brw_RNDD(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDE:
	 brw_RNDE(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDZ:
	 brw_RNDZ(p, dst, src[0]);
	 break;

      case BRW_OPCODE_AND:
	 brw_AND(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_OR:
	 brw_OR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_XOR:
	 brw_XOR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_NOT:
	 brw_NOT(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ASR:
	 brw_ASR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHR:
	 brw_SHR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHL:
	 brw_SHL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_CMP:
	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
	 break;
      case BRW_OPCODE_SEL:
	 brw_SEL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DPH:
	 brw_DPH(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP4:
	 brw_DP4(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP3:
	 brw_DP3(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP2:
	 brw_DP2(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_IF:
	 if (inst->src[0].file != BAD_FILE) {
	    /* The instruction has an embedded compare (only allowed on gen6) */
	    assert(intel->gen == 6);
	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
	 } else {
	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
	    brw_inst->header.predicate_control = inst->predicate;
	 }
	 break;

      case BRW_OPCODE_ELSE:
	 brw_ELSE(p);
	 break;
      case BRW_OPCODE_ENDIF:
	 brw_ENDIF(p);
	 break;

      case BRW_OPCODE_DO:
	 brw_DO(p, BRW_EXECUTE_8);
	 break;

      case BRW_OPCODE_BREAK:
	 brw_BREAK(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;
      case BRW_OPCODE_CONTINUE:
	 /* FINISHME: We need to write the loop instruction support still. */
	 if (intel->gen >= 6)
	    gen6_CONT(p);
	 else
	    brw_CONT(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;

      case BRW_OPCODE_WHILE:
	 brw_WHILE(p);
	 break;

      default:
	 generate_vs_instruction(inst, dst, src);
	 break;
      }

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 brw_dump_compile(p, stdout,
			  last_native_insn_offset, p->next_insn_offset);
      }

      last_native_insn_offset = p->next_insn_offset;
   }
Exemplo n.º 24
0
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);
   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      pc_coord_replace = calculate_point_sprite_mask(c, i);
      pc_persp &= ~pc_coord_replace;

      if (pc_persp) {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }

      /* Point sprite coordinate replacement: A texcoord with this
       * enabled gets replaced with the value (x, y, 0, 1) where x and
       * y vary from 0 to 1 across the horizontal and vertical of the
       * point.
       */
      if (pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc_coord_replace);
	 /* Caculate 1.0/PointWidth */
	 brw_math(&c->func,
		  c->tmp,
		  BRW_MATH_FUNCTION_INV,
		  0,
		  c->dx0,
		  BRW_MATH_DATA_SCALAR,
		  BRW_MATH_PRECISION_FULL);

	 brw_set_access_mode(p, BRW_ALIGN_16);

	 /* dA/dx, dA/dy */
	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
	 } else {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
	 }

	 /* attribute constant offset */
	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
	 } else {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
	 }

	 brw_set_access_mode(p, BRW_ALIGN_1);
      }

      if (pc & ~pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
	 brw_MOV(p, c->m3C0, a0); /* constant value */
      }


      set_predicate_control_flag_value(p, c, pc);
      /* Copy m0..m3 to URB. */
      brw_urb_WRITE(p,
		    brw_null_reg(),
		    0,
		    brw_vec8_grf(0, 0),
                    last ? BRW_URB_WRITE_EOT_COMPLETE
                    : BRW_URB_WRITE_NO_FLAGS,
		    4, 	/* msg len */
		    0,	/* response len */
		    i*4,	/* urb destination offset */
		    BRW_URB_SWIZZLE_TRANSPOSE);
   }

   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Exemplo n.º 25
0
/* Use mesa's clipping algorithms, translated to GEN4 assembly.
 */
void brw_clip_tri( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_indirect vtx = brw_indirect(0, 0);
   struct brw_indirect vtxPrev = brw_indirect(1, 0);
   struct brw_indirect vtxOut = brw_indirect(2, 0);
   struct brw_indirect plane_ptr = brw_indirect(3, 0);
   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
   struct brw_instruction *plane_loop;
   struct brw_instruction *plane_active;
   struct brw_instruction *vertex_loop;
   struct brw_instruction *next_test;
   struct brw_instruction *prev_test;
   
   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));

   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );

   plane_loop = brw_DO(p, BRW_EXECUTE_1);
   {
      /* if (planemask & 1)
       */
      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
      
      plane_active = brw_IF(p, BRW_EXECUTE_1);
      {
	 /* vtxOut = freelist_ptr++ 
	  */
	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));

	 if (c->key.nr_userclip)
	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
	 else
	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
	    
	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));

	 vertex_loop = brw_DO(p, BRW_EXECUTE_1);
	 {
	    /* vtx = *input_ptr;
	     */
	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));

	    /* IS_NEGATIVE(prev) */
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation);
	    prev_test = brw_IF(p, BRW_EXECUTE_1);
	    {
	       /* IS_POSITIVE(next)
		*/
	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
	       next_test = brw_IF(p, BRW_EXECUTE_1);
	       {

		  /* Coming back in.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);

		  /* If (vtxOut == 0) vtxOut = vtxPrev
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);

		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++; 
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       }
	       brw_ENDIF(p, next_test);
	       
	    }
	    prev_test = brw_ELSE(p, prev_test);
	    {
	       /* *outlist_ptr++ = vtxPrev;
		* nr_verts++;
		*/
	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));

	       /* IS_NEGATIVE(next)
		*/
	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
	       next_test = brw_IF(p, BRW_EXECUTE_1);
	       {
		  /* Going out of bounds.  Avoid division by zero as we
		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);

		  /* If (vtxOut == 0) vtxOut = vtx
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);

		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);		  

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++; 
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       } 	       
	       brw_ENDIF(p, next_test);
	    }
	    brw_ENDIF(p, prev_test);
	    
	    /* vtxPrev = vtx;
	     * inlist_ptr++;
	     */
	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));

	    /* while (--loopcount != 0)
	     */
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
	 } 
	 brw_WHILE(p, vertex_loop);

	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
	  * inlist = outlist
	  * inlist_ptr = &inlist[0]
	  * outlist_ptr = &outlist[0]
	  */
	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
      }
      brw_ENDIF(p, plane_active);
      
      /* plane_ptr++;
       */
      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));

      /* nr_verts >= 3 
       */
      brw_CMP(p,
	      vec1(brw_null_reg()),
	      BRW_CONDITIONAL_GE,
	      c->reg.nr_verts,
	      brw_imm_ud(3));
   
      /* && (planemask>>=1) != 0
       */
      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
   }
   brw_WHILE(p, plane_loop);
}
Exemplo n.º 26
0
/**
 * Read float[4] constant(s) from VS constant buffer.
 * For relative addressing, two float[4] constants will be read into 'dest'.
 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
 */
void brw_dp_READ_4_vs(struct brw_compile *p,
                      struct brw_reg dest,
                      GLuint oword,
                      GLboolean relAddr,
                      struct brw_reg addrReg,
                      GLuint location,
                      GLuint bind_table_index)
{
   GLuint msg_reg_nr = 1;

   assert(oword < 2);
   /*
   printf("vs const read msg, location %u, msg_reg_nr %d\n",
          location, msg_reg_nr);
   */

   /* Setup MRF[1] with location/offset into const buffer */
   {
      struct brw_reg b;

      brw_push_insn_state(p);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      /*brw_set_access_mode(p, BRW_ALIGN_16);*/

      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
       */
      b = brw_message_reg(msg_reg_nr);
      b = retype(b, BRW_REGISTER_TYPE_UD);
      /*b = get_element_ud(b, 2);*/
      if (relAddr) {
         brw_ADD(p, b, addrReg, brw_imm_ud(location));
      }
      else {
         brw_MOV(p, b, brw_imm_ud(location));
      }

      brw_pop_insn_state(p);
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = BRW_PREDICATE_NONE;
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
      insn->header.mask_control = BRW_MASK_DISABLE;
      /*insn->header.access_mode = BRW_ALIGN_16;*/
  
      brw_set_dest(insn, dest);
      brw_set_src0(insn, brw_null_reg());

      brw_set_dp_read_message(p->brw,
			      insn,
			      bind_table_index,
			      oword,  /* 0 = lower Oword, 1 = upper Oword */
			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
			      0, /* source cache = data cache */
			      1, /* msg_length */
			      1, /* response_length (1 Oword) */
			      0); /* eot */
   }
}
Exemplo n.º 27
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_compile *p = &c->func;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_gs_alloc_regs(c, num_verts, true);
   brw_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         brw_MOV(p, destination_indices_uw,
                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                         : 0x00020001)); /* (1, 0, 2) */
         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      }
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));

      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_access_mode(p, BRW_ALIGN_16);
            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_set_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_gs_ff_sync(c, 1);

   /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
    * release the URB that was just allocated, and terminate the thread.
    */
   if (key->rasterizer_discard) {
      brw_gs_terminate(c);
      return;
   }

   brw_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
#define MAX_IFSN 32
#define MAX_LOOP_DEPTH 32
    struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
    struct brw_instruction *inst0, *inst1;
    int i, if_insn = 0, loop_insn = 0;
    struct brw_compile *p = &c->func;
    struct brw_indirect stack_index = brw_indirect(0, 0);

    c->reg_index = 0;
    prealloc_reg(c);
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));

    for (i = 0; i < c->nr_fp_insns; i++) {
	struct prog_instruction *inst = &c->prog_instructions[i];
	struct prog_instruction *orig_inst;

	if ((orig_inst = inst->Data) != 0)
	    orig_inst->Data = current_insn(p);

	if (inst->CondUpdate)
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
	else
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);

	switch (inst->Opcode) {
	    case WM_PIXELXY:
		emit_pixel_xy(c, inst);
		break;
	    case WM_DELTAXY: 
		emit_delta_xy(c, inst);
		break;
	    case WM_PIXELW:
		emit_pixel_w(c, inst);
		break;	
	    case WM_LINTERP:
		emit_linterp(c, inst);
		break;
	    case WM_PINTERP:
		emit_pinterp(c, inst);
		break;
	    case WM_CINTERP:
		emit_cinterp(c, inst);
		break;
	    case WM_WPOSXY:
		emit_wpos_xy(c, inst);
		break;
	    case WM_FB_WRITE:
		emit_fb_write(c, inst);
		break;
	    case OPCODE_ABS:
		emit_abs(c, inst);
		break;
	    case OPCODE_ADD:
		emit_add(c, inst);
		break;
	    case OPCODE_SUB:
		emit_sub(c, inst);
		break;
	    case OPCODE_FRC:
		emit_frc(c, inst);
		break;
	    case OPCODE_FLR:
		emit_flr(c, inst);
		break;
	    case OPCODE_LRP:
		emit_lrp(c, inst);
		break;
	    case OPCODE_INT:
		emit_int(c, inst);
		break;
	    case OPCODE_MOV:
		emit_mov(c, inst);
		break;
	    case OPCODE_DP3:
		emit_dp3(c, inst);
		break;
	    case OPCODE_DP4:
		emit_dp4(c, inst);
		break;
	    case OPCODE_XPD:
		emit_xpd(c, inst);
		break;
	    case OPCODE_DPH:
		emit_dph(c, inst);
		break;
	    case OPCODE_RCP:
		emit_rcp(c, inst);
		break;
	    case OPCODE_RSQ:
		emit_rsq(c, inst);
		break;
	    case OPCODE_SIN:
		emit_sin(c, inst);
		break;
	    case OPCODE_COS:
		emit_cos(c, inst);
		break;
	    case OPCODE_EX2:
		emit_ex2(c, inst);
		break;
	    case OPCODE_LG2:
		emit_lg2(c, inst);
		break;
	    case OPCODE_MAX:	
		emit_max(c, inst);
		break;
	    case OPCODE_MIN:	
		emit_min(c, inst);
		break;
	    case OPCODE_DDX:
		emit_ddx(c, inst);
		break;
	    case OPCODE_DDY:
                emit_ddy(c, inst);
                break;
	    case OPCODE_SLT:
		emit_slt(c, inst);
		break;
	    case OPCODE_SLE:
		emit_sle(c, inst);
		break;
	    case OPCODE_SGT:
		emit_sgt(c, inst);
		break;
	    case OPCODE_SGE:
		emit_sge(c, inst);
		break;
	    case OPCODE_SEQ:
		emit_seq(c, inst);
		break;
	    case OPCODE_SNE:
		emit_sne(c, inst);
		break;
	    case OPCODE_MUL:
		emit_mul(c, inst);
		break;
	    case OPCODE_POW:
		emit_pow(c, inst);
		break;
	    case OPCODE_MAD:
		emit_mad(c, inst);
		break;
	    case OPCODE_TEX:
		emit_tex(c, inst);
		break;
	    case OPCODE_TXB:
		emit_txb(c, inst);
		break;
	    case OPCODE_KIL_NV:
		emit_kil(c);
		break;
	    case OPCODE_IF:
		assert(if_insn < MAX_IFSN);
		if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
		break;
	    case OPCODE_ELSE:
		if_inst[if_insn-1]  = brw_ELSE(p, if_inst[if_insn-1]);
		break;
	    case OPCODE_ENDIF:
		assert(if_insn > 0);
		brw_ENDIF(p, if_inst[--if_insn]);
		break;
	    case OPCODE_BGNSUB:
	    case OPCODE_ENDSUB:
		break;
	    case OPCODE_CAL: 
		brw_push_insn_state(p);
		brw_set_mask_control(p, BRW_MASK_DISABLE);
                brw_set_access_mode(p, BRW_ALIGN_1);
                brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
                brw_set_access_mode(p, BRW_ALIGN_16);
                brw_ADD(p, get_addr_reg(stack_index),
                         get_addr_reg(stack_index), brw_imm_d(4));
                orig_inst = inst->Data;
                orig_inst->Data = &p->store[p->nr_insn];
                brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
                brw_pop_insn_state(p);
		break;

	    case OPCODE_RET:
		brw_push_insn_state(p);
		brw_set_mask_control(p, BRW_MASK_DISABLE);
                brw_ADD(p, get_addr_reg(stack_index),
                        get_addr_reg(stack_index), brw_imm_d(-4));
                brw_set_access_mode(p, BRW_ALIGN_1);
                brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
                brw_set_access_mode(p, BRW_ALIGN_16);
		brw_pop_insn_state(p);

		break;
	    case OPCODE_BGNLOOP:
		loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
		break;
	    case OPCODE_BRK:
		brw_BREAK(p);
		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
		break;
	    case OPCODE_CONT:
		brw_CONT(p);
		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
		break;
	    case OPCODE_ENDLOOP: 
		loop_insn--;
		inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
		/* patch all the BREAK instructions from
		   last BEGINLOOP */
		while (inst0 > loop_inst[loop_insn]) {
		    inst0--;
		    if (inst0->header.opcode == BRW_OPCODE_BREAK) {
			inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
			inst0->bits3.if_else.pop_count = 0;
		    } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
                        inst0->bits3.if_else.jump_count = inst1 - inst0;
                        inst0->bits3.if_else.pop_count = 0;
                    }
		}
		break;
	    default:
		_mesa_printf("unsupported IR in fragment shader %d\n",
			inst->Opcode);
	}
	if (inst->CondUpdate)
	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	else
	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
    post_wm_emit(c);
    for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
	c->fp->program.Base.Instructions[i].Data = NULL;
}