static void emit_min(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg src0, src1, dst;
    int i;
    brw_push_insn_state(p);
    for (i = 0; i < 4; i++) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_MOV(p, dst, src0);
	    brw_set_saturate(p, 0);

	    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	    brw_MOV(p, dst, src1);
	    brw_set_saturate(p, 0);
	    brw_set_predicate_control_flag_value(p, 0xff);
	}
    }
    brw_pop_insn_state(p);
}
static void emit_lit( struct brw_compile *p, 
		      const struct brw_reg *dst,
		      GLuint mask,
		      const struct brw_reg *arg0 )
{
   assert((mask & WRITEMASK_XW) == 0);

   if (mask & WRITEMASK_Y) {
      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
      brw_MOV(p, dst[1], arg0[0]);
      brw_set_saturate(p, 0);
   }

   if (mask & WRITEMASK_Z) {
      emit_math2(p, BRW_MATH_FUNCTION_POW,
		 &dst[2],
		 WRITEMASK_X | (mask & SATURATE),
		 &arg0[1],
		 &arg0[3]);
   }

   /* Ordinarily you'd use an iff statement to skip or shortcircuit
    * some of the POW calculations above, but 16-wide iff statements
    * seem to lock c1 hardware, so this is a nasty workaround:
    */
   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
   {
      if (mask & WRITEMASK_Y) 
	 brw_MOV(p, dst[1], brw_imm_f(0));

      if (mask & WRITEMASK_Z) 
	 brw_MOV(p, dst[2], brw_imm_f(0)); 
   }
   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
static void emit_ddy(struct brw_wm_compile *c,
                struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg interp[4];
    struct brw_reg dst;
    struct brw_reg src0, w;
    GLuint nr, i;

    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
    nr = src0.nr;
    w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
    interp[0] = brw_vec1_grf(nr, 0);
    interp[1] = brw_vec1_grf(nr, 4);
    interp[2] = brw_vec1_grf(nr+1, 0);
    interp[3] = brw_vec1_grf(nr+1, 4);
    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
    for(i = 0; i < 4; i++ ) {
        if (mask & (1<<i)) {
            dst = get_dst_reg(c, inst, i, 1);
            brw_MOV(p, dst, suboffset(interp[i], 1));
            brw_MUL(p, dst, dst, w);
        }
    }
    brw_set_saturate(p, 0);
}
Beispiel #4
0
/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 * looking like:
 *
 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
 *
 * and we're trying to produce:
 *
 *           DDX                     DDY
 * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
 *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
 *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
 *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
 *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
 *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
 *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
 *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
 *
 * and add another set of two more subspans if in 16-pixel dispatch mode.
 *
 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 * between each other.  We could probably do it like ddx and swizzle the right
 * order later, but bail for now and just produce
 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
 *
 * The negate_value boolean is used to negate the d/dy computation for FBOs,
 * since they place the origin at the upper left instead of the lower left.
 */
void emit_ddxy(struct brw_compile *p,
	       const struct brw_reg *dst,
	       GLuint mask,
	       bool is_ddx,
	       const struct brw_reg *arg0,
               bool negate_value)
{
   int i;
   struct brw_reg src0, src1;

   if (mask & SATURATE)
      brw_set_saturate(p, 1);
   for (i = 0; i < 4; i++ ) {
      if (mask & (1<<i)) {
	 if (is_ddx) {
	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
			   BRW_REGISTER_TYPE_F,
			   BRW_VERTICAL_STRIDE_2,
			   BRW_WIDTH_2,
			   BRW_HORIZONTAL_STRIDE_0,
			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
			   BRW_REGISTER_TYPE_F,
			   BRW_VERTICAL_STRIDE_2,
			   BRW_WIDTH_2,
			   BRW_HORIZONTAL_STRIDE_0,
			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
	 } else {
	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
			   BRW_REGISTER_TYPE_F,
			   BRW_VERTICAL_STRIDE_4,
			   BRW_WIDTH_4,
			   BRW_HORIZONTAL_STRIDE_0,
			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
			   BRW_REGISTER_TYPE_F,
			   BRW_VERTICAL_STRIDE_4,
			   BRW_WIDTH_4,
			   BRW_HORIZONTAL_STRIDE_0,
			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
	 }
         if (negate_value)
            brw_ADD(p, dst[i], src1, negate(src0));
         else
            brw_ADD(p, dst[i], src0, negate(src1));
      }
   }
   if (mask & SATURATE)
      brw_set_saturate(p, 0);
}
Beispiel #5
0
void
brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)
{
   p->brw = brw;
   /*
    * Set the initial instruction store array size to 1024, if found that
    * isn't enough, then it will double the store size at brw_next_insn()
    * until out of memory.
    */
   p->store_size = 1024;
   p->store = rzalloc_array(mem_ctx, struct brw_instruction, p->store_size);
   p->nr_insn = 0;
   p->current = p->stack;
   p->compressed = false;
   memset(p->current, 0, sizeof(p->current[0]));

   p->mem_ctx = mem_ctx;

   /* Some defaults?
    */
   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
   brw_set_saturate(p, 0);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_set_predicate_control_flag_value(p, 0xff); 

   /* Set up control flow stack */
   p->if_stack_depth = 0;
   p->if_stack_array_size = 16;
   p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);

   p->loop_stack_depth = 0;
   p->loop_stack_array_size = 16;
   p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
   p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
}
static void emit_abs( struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    int i;
    struct brw_compile *p = &c->func;
    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
    for (i = 0; i < 4; i++) {
	if (inst->DstReg.WriteMask & (1<<i)) {
	    struct brw_reg src, dst;
	    dst = get_dst_reg(c, inst, i, 1);
	    src = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    brw_MOV(p, dst, brw_abs(src));
	}
    }
    brw_set_saturate(p, 0);
}
Beispiel #7
0
void
vec4_generator::generate_math2_gen4(vec4_instruction *inst,
                                    struct brw_reg dst,
                                    struct brw_reg src0,
                                    struct brw_reg src1)
{
    /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
     * "Message Payload":
     *
     * "Operand0[7].  For the INT DIV functions, this operand is the
     *  denominator."
     *  ...
     * "Operand1[7].  For the INT DIV functions, this operand is the
     *  numerator."
     */
    bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
    struct brw_reg &op0 = is_int_div ? src1 : src0;
    struct brw_reg &op1 = is_int_div ? src0 : src1;

    brw_push_insn_state(p);
    brw_set_saturate(p, false);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
    brw_pop_insn_state(p);

    brw_math(p,
             dst,
             brw_math_function(inst->opcode),
             inst->base_mrf,
             op0,
             BRW_MATH_DATA_VECTOR,
             BRW_MATH_PRECISION_FULL);
}
static void emit_flr(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    struct brw_reg src0, dst;
    GLuint mask = inst->DstReg.WriteMask;
    int i;
    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
    for (i = 0 ; i < 4; i++) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    brw_RNDD(p, dst, src0);
	}
    }
    brw_set_saturate(p, 0);
}
static void emit_dp3( struct brw_compile *p, 
		      const struct brw_reg *dst,
		      GLuint mask,
		      const struct brw_reg *arg0,
		      const struct brw_reg *arg1 )
{
   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code*/

   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);

   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);

   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
   brw_MAC(p, dst[0], arg0[2], arg1[2]);
   brw_set_saturate(p, 0);
}
static void emit_dph(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_reg src0[4], src1[4], dst;
    int i;
    struct brw_compile *p = &c->func;
    for (i = 0; i < 4; i++) {
	src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
	src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
    }
    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
    brw_MAC(p, dst, src0[2], src1[2]);
    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
    brw_ADD(p, dst, src0[3], src1[3]);
    brw_set_saturate(p, 0);
}
Beispiel #11
0
void emit_mad(struct brw_compile *p,
	      const struct brw_reg *dst,
	      GLuint mask,
	      const struct brw_reg *arg0,
	      const struct brw_reg *arg1,
	      const struct brw_reg *arg2)
{
   GLuint i;

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 brw_MUL(p, dst[i], arg0[i], arg1[i]);

	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
	 brw_ADD(p, dst[i], dst[i], arg2[i]);
	 brw_set_saturate(p, 0);
      }
   }
}
Beispiel #12
0
void emit_min(struct brw_compile *p,
	      const struct brw_reg *dst,
	      GLuint mask,
	      const struct brw_reg *arg0,
	      const struct brw_reg *arg1)
{
   GLuint i;

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {	
	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);

	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
	 brw_SEL(p, dst[i], arg0[i], arg1[i]);
	 brw_set_saturate(p, 0);
	 brw_set_predicate_control_flag_value(p, 0xff);
      }
   }
}
Beispiel #13
0
void emit_dp2(struct brw_compile *p,
	      const struct brw_reg *dst,
	      GLuint mask,
	      const struct brw_reg *arg0,
	      const struct brw_reg *arg1)
{
   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;

   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code */

   assert(is_power_of_two(mask & WRITEMASK_XYZW));

   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);

   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
   brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]);
   brw_set_saturate(p, 0);
}
Beispiel #14
0
void brw_init_compile( struct brw_compile *p )
{
   p->nr_insn = 0;
   p->current = p->stack;
   memset(p->current, 0, sizeof(p->current[0]));

   /* Some defaults?
    */
   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
   brw_set_saturate(p, 0);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_set_predicate_control_flag_value(p, 0xff); 
}
static void emit_mad(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg dst, src0, src1, src2;
    int i;

    for (i = 0; i < 4; i++) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
	    src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
	    brw_MUL(p, dst, src0, src1);

	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_ADD(p, dst, dst, src2);
	    brw_set_saturate(p, 0);
	}
    }
}
Beispiel #16
0
void emit_alu1(struct brw_compile *p,
	       struct brw_instruction *(*func)(struct brw_compile *,
					       struct brw_reg,
					       struct brw_reg),
	       const struct brw_reg *dst,
	       GLuint mask,
	       const struct brw_reg *arg0)
{
   GLuint i;

   if (mask & SATURATE)
      brw_set_saturate(p, 1);

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 func(p, dst[i], arg0[i]);
      }
   }

   if (mask & SATURATE)
      brw_set_saturate(p, 0);
}
Beispiel #17
0
void emit_xpd(struct brw_compile *p,
	      const struct brw_reg *dst,
	      GLuint mask,
	      const struct brw_reg *arg0,
	      const struct brw_reg *arg1)
{
   GLuint i;

   assert((mask & WRITEMASK_W) != WRITEMASK_W);
   
   for (i = 0 ; i < 3; i++) {
      if (mask & (1<<i)) {
	 GLuint i2 = (i+2)%3;
	 GLuint i1 = (i+1)%3;

	 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);

	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
	 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
	 brw_set_saturate(p, 0);
      }
   }
}
Beispiel #18
0
void emit_lrp(struct brw_compile *p,
	      const struct brw_reg *dst,
	      GLuint mask,
	      const struct brw_reg *arg0,
	      const struct brw_reg *arg1,
	      const struct brw_reg *arg2)
{
   GLuint i;

   /* Uses dst as a temporary:
    */
   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {	
	 /* Can I use the LINE instruction for this? 
	  */
	 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
	 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);

	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
	 brw_MAC(p, dst[i], arg0[i], arg1[i]);
	 brw_set_saturate(p, 0);
      }
   }
}
static void emit_xpd(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    int i;
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    for (i = 0; i < 4; i++) {
	GLuint i2 = (i+2)%3;
	GLuint i1 = (i+1)%3;
	if (mask & (1<<i)) {
	    struct brw_reg src0, src1, dst;
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
	    src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
	    brw_MUL(p, brw_null_reg(), src0, src1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
	    src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
	    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
	    brw_MAC(p, dst, src0, src1);
	    brw_set_saturate(p, 0);
	}
    }
    brw_set_saturate(p, 0);
}
static void emit_lrp(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
    int i;
    for (i = 0; i < 4; i++) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);

	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);

	    if (src1.nr == dst.nr) {
		tmp1 = alloc_tmp(c);
		brw_MOV(p, tmp1, src1);
	    } else
		tmp1 = src1;

	    src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
	    if (src2.nr == dst.nr) {
		tmp2 = alloc_tmp(c);
		brw_MOV(p, tmp2, src2);
	    } else
		tmp2 = src2;

	    brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
	    brw_MUL(p, brw_null_reg(), dst, tmp2);
	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_MAC(p, dst, src0, tmp1);
	    brw_set_saturate(p, 0);
	}
	release_tmps(c);
    }
}
Beispiel #21
0
void brw_compile_init(struct brw_compile *p, int gen, void *store)
{
	assert(gen);

	p->gen = gen;
	p->store = store;

	p->nr_insn = 0;
	p->current = p->stack;
	p->compressed = false;
	memset(p->current, 0, sizeof(p->current[0]));

	/* Some defaults?
	*/
	brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
	brw_set_saturate(p, 0);
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	brw_set_predicate_control_flag_value(p, 0xff);

	p->if_stack_depth = 0;
	p->if_stack_array_size = 0;
	p->if_stack = NULL;
}
Beispiel #22
0
void emit_math2(struct brw_wm_compile *c,
		GLuint function,
		const struct brw_reg *dst,
		GLuint mask,
		const struct brw_reg *arg0,
		const struct brw_reg *arg1)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;

   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code */

   assert(is_power_of_two(mask & WRITEMASK_XYZW));

   brw_push_insn_state(p);

   /* math can only operate on up to a vec8 at a time, so in
    * dispatch_width==16 we have to do the second half manually.
    */
   if (intel->gen >= 6) {
      struct brw_reg src0 = arg0[0];
      struct brw_reg src1 = arg1[0];
      struct brw_reg temp_dst = dst[dst_chan];

      if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
	 brw_MOV(p, temp_dst, src0);
	 src0 = temp_dst;
      }

      if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
	 /* This is a heinous hack to get a temporary register for use
	  * in case both arg0 and arg1 are constants.  Why you're
	  * doing exponentiation on constant values in the shader, we
	  * don't know.
	  *
	  * max_wm_grf is almost surely less than the maximum GRF, and
	  * gen6 doesn't care about the number of GRFs used in a
	  * shader like pre-gen6 did.
	  */
	 struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
	 brw_MOV(p, temp, src1);
	 src1 = temp;
      }

      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_math2(p,
		temp_dst,
		function,
		src0,
		src1);
      if (c->dispatch_width == 16) {
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_math2(p,
		   sechalf(temp_dst),
		   function,
		   sechalf(src0),
		   sechalf(src1));
      }
   } else {
      GLuint saturate = ((mask & SATURATE) ?
			 BRW_MATH_SATURATE_SATURATE :
			 BRW_MATH_SATURATE_NONE);

      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_MOV(p, brw_message_reg(3), arg1[0]);
      if (c->dispatch_width == 16) {
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
      }

      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_math(p,
	       dst[dst_chan],
	       function,
	       saturate,
	       2,
	       arg0[0],
	       BRW_MATH_DATA_VECTOR,
	       BRW_MATH_PRECISION_FULL);

      /* Send two messages to perform all 16 operations:
       */
      if (c->dispatch_width == 16) {
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_math(p,
		  offset(dst[dst_chan],1),
		  function,
		  saturate,
		  4,
		  sechalf(arg0[0]),
		  BRW_MATH_DATA_VECTOR,
		  BRW_MATH_PRECISION_FULL);
      }
   }
   brw_pop_insn_state(p);
}
Beispiel #23
0
void
vec4_generator::generate_code(exec_list *instructions)
{
   int last_native_insn_offset = 0;
   const char *last_annotation_string = NULL;
   const void *last_annotation_ir = NULL;

   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
      if (shader) {
         printf("Native code for vertex shader %d:\n", prog->Name);
      } else {
         printf("Native code for vertex program %d:\n", c->vp->program.Base.Id);
      }
   }

   foreach_list(node, instructions) {
      vec4_instruction *inst = (vec4_instruction *)node;
      struct brw_reg src[3], dst;

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 if (last_annotation_ir != inst->ir) {
	    last_annotation_ir = inst->ir;
	    if (last_annotation_ir) {
	       printf("   ");
               if (shader) {
                  ((ir_instruction *) last_annotation_ir)->print();
               } else {
                  const prog_instruction *vpi;
                  vpi = (const prog_instruction *) inst->ir;
                  printf("%d: ", (int)(vpi - vp->Base.Instructions));
                  _mesa_fprint_instruction_opt(stdout, vpi, 0,
                                               PROG_PRINT_DEBUG, NULL);
               }
	       printf("\n");
	    }
	 }
	 if (last_annotation_string != inst->annotation) {
	    last_annotation_string = inst->annotation;
	    if (last_annotation_string)
	       printf("   %s\n", last_annotation_string);
	 }
      }

      for (unsigned int i = 0; i < 3; i++) {
	 src[i] = inst->get_src(i);
      }
      dst = inst->get_dst();

      brw_set_conditionalmod(p, inst->conditional_mod);
      brw_set_predicate_control(p, inst->predicate);
      brw_set_predicate_inverse(p, inst->predicate_inverse);
      brw_set_saturate(p, inst->saturate);

      switch (inst->opcode) {
      case BRW_OPCODE_MOV:
	 brw_MOV(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ADD:
	 brw_ADD(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MUL:
	 brw_MUL(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MACH:
	 brw_set_acc_write_control(p, 1);
	 brw_MACH(p, dst, src[0], src[1]);
	 brw_set_acc_write_control(p, 0);
	 break;

      case BRW_OPCODE_FRC:
	 brw_FRC(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDD:
	 brw_RNDD(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDE:
	 brw_RNDE(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDZ:
	 brw_RNDZ(p, dst, src[0]);
	 break;

      case BRW_OPCODE_AND:
	 brw_AND(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_OR:
	 brw_OR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_XOR:
	 brw_XOR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_NOT:
	 brw_NOT(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ASR:
	 brw_ASR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHR:
	 brw_SHR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHL:
	 brw_SHL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_CMP:
	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
	 break;
      case BRW_OPCODE_SEL:
	 brw_SEL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DPH:
	 brw_DPH(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP4:
	 brw_DP4(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP3:
	 brw_DP3(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP2:
	 brw_DP2(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_IF:
	 if (inst->src[0].file != BAD_FILE) {
	    /* The instruction has an embedded compare (only allowed on gen6) */
	    assert(intel->gen == 6);
	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
	 } else {
	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
	    brw_inst->header.predicate_control = inst->predicate;
	 }
	 break;

      case BRW_OPCODE_ELSE:
	 brw_ELSE(p);
	 break;
      case BRW_OPCODE_ENDIF:
	 brw_ENDIF(p);
	 break;

      case BRW_OPCODE_DO:
	 brw_DO(p, BRW_EXECUTE_8);
	 break;

      case BRW_OPCODE_BREAK:
	 brw_BREAK(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;
      case BRW_OPCODE_CONTINUE:
	 /* FINISHME: We need to write the loop instruction support still. */
	 if (intel->gen >= 6)
	    gen6_CONT(p);
	 else
	    brw_CONT(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;

      case BRW_OPCODE_WHILE:
	 brw_WHILE(p);
	 break;

      default:
	 generate_vs_instruction(inst, dst, src);
	 break;
      }

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 brw_dump_compile(p, stdout,
			  last_native_insn_offset, p->next_insn_offset);
      }

      last_native_insn_offset = p->next_insn_offset;
   }
Beispiel #24
0
/* Post-fragment-program processing.  Send the results to the
 * framebuffer.
 * \param arg0  the fragment color
 * \param arg1  the pass-through depth value
 * \param arg2  the shader-computed depth value
 */
void emit_fb_write(struct brw_wm_compile *c,
		   struct brw_reg *arg0,
		   struct brw_reg *arg1,
		   struct brw_reg *arg2,
		   GLuint target,
		   GLuint eot)
{
   struct brw_compile *p = &c->func;
   struct brw_context *brw = p->brw;
   struct intel_context *intel = &brw->intel;
   GLuint nr = 2;
   GLuint channel;

   /* Reserve a space for AA - may not be needed:
    */
   if (c->aa_dest_stencil_reg)
      nr += 1;

   /* I don't really understand how this achieves the color interleave
    * (ie RGBARGBA) in the result:  [Do the saturation here]
    */
   brw_push_insn_state(p);

   if (c->key.clamp_fragment_color)
      brw_set_saturate(p, 1);

   for (channel = 0; channel < 4; channel++) {
      if (intel->gen >= 6) {
	 /* gen6 SIMD16 single source DP write looks like:
	  * m + 0: r0
	  * m + 1: r1
	  * m + 2: g0
	  * m + 3: g1
	  * m + 4: b0
	  * m + 5: b1
	  * m + 6: a0
	  * m + 7: a1
	  */
	 if (c->dispatch_width == 16) {
	    brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]);
	 } else {
	    brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]);
	 }
      } else if (c->dispatch_width == 16 && brw->has_compr4) {
	 /* pre-gen6 SIMD16 single source DP write looks like:
	  * m + 0: r0
	  * m + 1: g0
	  * m + 2: b0
	  * m + 3: a0
	  * m + 4: r1
	  * m + 5: g1
	  * m + 6: b1
	  * m + 7: a1
	  *
	  * By setting the high bit of the MRF register number, we indicate
	  * that we want COMPR4 mode - instead of doing the usual destination
	  * + 1 for the second half we get destination + 4.
	  */
	 brw_MOV(p,
		 brw_message_reg(nr + channel + BRW_MRF_COMPR4),
		 arg0[channel]);
      } else {
	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	 brw_MOV(p,
		 brw_message_reg(nr + channel),
		 arg0[channel]);

	 if (c->dispatch_width == 16) {
	    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	    brw_MOV(p,
		    brw_message_reg(nr + channel + 4),
		    sechalf(arg0[channel]));
	 }
      }
   }

   brw_set_saturate(p, 0);

   /* skip over the regs populated above:
    */
   if (c->dispatch_width == 16)
      nr += 8;
   else
      nr += 4;

   brw_pop_insn_state(p);

   if (c->source_depth_to_render_target)
   {
      if (c->computes_depth)
	 brw_MOV(p, brw_message_reg(nr), arg2[2]);
      else 
	 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */

      nr += 2;
   }

   if (c->dest_depth_reg)
   {
      GLuint comp = c->dest_depth_reg / 2;
      GLuint off = c->dest_depth_reg % 2;

      if (off != 0) {
         brw_push_insn_state(p);
         brw_set_compression_control(p, BRW_COMPRESSION_NONE);

         brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
         /* 2nd half? */
         brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
         brw_pop_insn_state(p);
      }
      else {
         brw_MOV(p, brw_message_reg(nr), arg1[comp]);
      }
      nr += 2;
   }

   if (intel->gen >= 6) {
      /* Load the message header.  There's no implied move from src0
       * to the base mrf on gen6.
       */
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD),
	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
      brw_pop_insn_state(p);

      if (target != 0) {
	 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
					0,
					2), BRW_REGISTER_TYPE_UD),
		 brw_imm_ud(target));
      }
   }

   if (!c->runtime_check_aads_emit) {
      if (c->aa_dest_stencil_reg)
	 emit_aa(c, arg1, 2);

      fire_fb_write(c, 0, nr, target, eot);
   }
   else {
      struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
      struct brw_reg ip = brw_ip_reg();
      struct brw_instruction *jmp;
      
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
      brw_AND(p, 
	      v1_null_ud, 
	      get_element_ud(brw_vec8_grf(1,0), 6), 
	      brw_imm_ud(1<<26)); 

      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
      {
	 emit_aa(c, arg1, 2);
	 fire_fb_write(c, 0, nr, target, eot);
	 /* note - thread killed in subroutine */
      }
      brw_land_fwd_jump(p, jmp);

      /* ELSE: Shuffle up one register to fill in the hole left for AA:
       */
      fire_fb_write(c, 1, nr-1, target, eot);
   }
}
Beispiel #25
0
void emit_tex(struct brw_wm_compile *c,
	      struct brw_reg *dst,
	      GLuint dst_flags,
	      struct brw_reg *arg,
	      struct brw_reg depth_payload,
	      GLuint tex_idx,
	      GLuint sampler,
	      bool shadow)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg dst_retyped;
   GLuint cur_mrf = 2, response_length;
   GLuint i, nr_texcoords;
   GLuint emit;
   GLuint msg_type;
   GLuint mrf_per_channel;
   GLuint simd_mode;

   if (c->dispatch_width == 16) {
      mrf_per_channel = 2;
      response_length = 8;
      dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
   } else {
      mrf_per_channel = 1;
      response_length = 4;
      dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
   }

   /* How many input regs are there?
    */
   switch (tex_idx) {
   case TEXTURE_1D_INDEX:
      emit = WRITEMASK_X;
      nr_texcoords = 1;
      break;
   case TEXTURE_2D_INDEX:
   case TEXTURE_1D_ARRAY_INDEX:
   case TEXTURE_RECT_INDEX:
      emit = WRITEMASK_XY;
      nr_texcoords = 2;
      break;
   case TEXTURE_3D_INDEX:
   case TEXTURE_2D_ARRAY_INDEX:
   case TEXTURE_CUBE_INDEX:
      emit = WRITEMASK_XYZ;
      nr_texcoords = 3;
      break;
   default:
      /* unexpected target */
      abort();
   }

   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
   if (intel->gen < 5 && c->dispatch_width == 8)
      nr_texcoords = 3;

   if (shadow) {
      if (intel->gen < 7) {
	 /* For shadow comparisons, we have to supply u,v,r. */
	 nr_texcoords = 3;
      } else {
	 /* On Ivybridge, the shadow comparitor comes first. Just load it. */
	 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
	 cur_mrf += mrf_per_channel;
      }
   }

   /* Emit the texcoords. */
   for (i = 0; i < nr_texcoords; i++) {
      if (c->key.tex.gl_clamp_mask[i] & (1 << sampler))
	 brw_set_saturate(p, true);

      if (emit & (1<<i))
	 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
      else
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
      cur_mrf += mrf_per_channel;

      brw_set_saturate(p, false);
   }

   /* Fill in the shadow comparison reference value. */
   if (shadow && intel->gen < 7) {
      if (intel->gen >= 5) {
	 /* Fill in the cube map array index value. */
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
	 cur_mrf += mrf_per_channel;
      } else if (c->dispatch_width == 8) {
	 /* Fill in the LOD bias value. */
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
	 cur_mrf += mrf_per_channel;
      }
      brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
      cur_mrf += mrf_per_channel;
   }

   if (intel->gen >= 5) {
      if (shadow)
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
      else
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
   } else {
      /* Note that G45 and older determines shadow compare and dispatch width
       * from message length for most messages.
       */
      if (c->dispatch_width == 16 && shadow)
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
      else
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
   }

   brw_SAMPLE(p,
	      dst_retyped,
	      1,
	      retype(depth_payload, BRW_REGISTER_TYPE_UW),
              SURF_INDEX_TEXTURE(sampler),
	      sampler,
	      dst_flags & WRITEMASK_XYZW,
	      msg_type,
	      response_length,
	      cur_mrf - 1,
	      1,
	      simd_mode,
	      BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
}
Beispiel #26
0
void
vec4_generator::generate_code(exec_list *instructions)
{
    int last_native_insn_offset = 0;
    const char *last_annotation_string = NULL;
    const void *last_annotation_ir = NULL;

    if (unlikely(debug_flag)) {
        if (shader_prog) {
            fprintf(stderr, "Native code for %s vertex shader %d:\n",
                    shader_prog->Label ? shader_prog->Label : "unnamed",
                    shader_prog->Name);
        } else {
            fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
        }
    }

    foreach_list(node, instructions) {
        vec4_instruction *inst = (vec4_instruction *)node;
        struct brw_reg src[3], dst;

        if (unlikely(debug_flag)) {
            if (last_annotation_ir != inst->ir) {
                last_annotation_ir = inst->ir;
                if (last_annotation_ir) {
                    fprintf(stderr, "   ");
                    if (shader_prog) {
                        ((ir_instruction *) last_annotation_ir)->fprint(stderr);
                    } else {
                        const prog_instruction *vpi;
                        vpi = (const prog_instruction *) inst->ir;
                        fprintf(stderr, "%d: ", (int)(vpi - prog->Instructions));
                        _mesa_fprint_instruction_opt(stderr, vpi, 0,
                                                     PROG_PRINT_DEBUG, NULL);
                    }
                    fprintf(stderr, "\n");
                }
            }
            if (last_annotation_string != inst->annotation) {
                last_annotation_string = inst->annotation;
                if (last_annotation_string)
                    fprintf(stderr, "   %s\n", last_annotation_string);
            }
        }

        for (unsigned int i = 0; i < 3; i++) {
            src[i] = inst->get_src(this->prog_data, i);
        }
        dst = inst->get_dst();

        brw_set_conditionalmod(p, inst->conditional_mod);
        brw_set_predicate_control(p, inst->predicate);
        brw_set_predicate_inverse(p, inst->predicate_inverse);
        brw_set_saturate(p, inst->saturate);
        brw_set_mask_control(p, inst->force_writemask_all);

        unsigned pre_emit_nr_insn = p->nr_insn;

        generate_vec4_instruction(inst, dst, src);

        if (inst->no_dd_clear || inst->no_dd_check) {
            assert(p->nr_insn == pre_emit_nr_insn + 1 ||
                   !"no_dd_check or no_dd_clear set for IR emitting more "
                   "than 1 instruction");

            struct brw_instruction *last = &p->store[pre_emit_nr_insn];

            if (inst->no_dd_clear)
                last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
            if (inst->no_dd_check)
                last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
        }

        if (unlikely(debug_flag)) {
            brw_dump_compile(p, stderr,
                             last_native_insn_offset, p->next_insn_offset);
        }

        last_native_insn_offset = p->next_insn_offset;
    }