static void emit_pixel_w( struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    if (mask & WRITEMASK_W) {
	struct brw_reg dst, src0, delta0, delta1;
	struct brw_reg interp3;

	dst = get_dst_reg(c, inst, 3, 1);
	src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
	delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
	delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);

	interp3 = brw_vec1_grf(src0.nr+1, 4);
	/* Calc 1/w - just linterp wpos[3] optimized by putting the
	 * result straight into a message reg.
	 */
	brw_LINE(p, brw_null_reg(), interp3, delta0);
	brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);

	/* Calc w */
	brw_math_16( p, dst,
		BRW_MATH_FUNCTION_INV,
		BRW_MATH_SATURATE_NONE,
		2, brw_null_reg(),
		BRW_MATH_PRECISION_FULL);
    }
}
示例#2
0
static void brw_wm_xy(struct brw_compile *p, int dw)
{
	struct brw_reg r1 = brw_vec1_grf(1, 0);
	struct brw_reg r1_uw = __retype_uw(r1);
	struct brw_reg x_uw, y_uw;

	brw_set_compression_control(p, BRW_COMPRESSION_NONE);

	if (dw == 16) {
		x_uw = brw_uw16_grf(30, 0);
		y_uw = brw_uw16_grf(28, 0);
	} else {
		x_uw = brw_uw8_grf(30, 0);
		y_uw = brw_uw8_grf(28, 0);
	}

	brw_ADD(p,
		x_uw,
		__stride(__suboffset(r1_uw, 4), 2, 4, 0),
		brw_imm_v(0x10101010));
	brw_ADD(p,
		y_uw,
		__stride(__suboffset(r1_uw, 5), 2, 4, 0),
		brw_imm_v(0x11001100));

	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);

	brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
	brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
static void emit_pixel_xy(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_reg r1 = brw_vec1_grf(1, 0);
    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);

    struct brw_reg dst0, dst1;
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    dst0 = get_dst_reg(c, inst, 0, 1);
    dst1 = get_dst_reg(c, inst, 1, 1);
    /* Calculate pixel centers by adding 1 or 0 to each of the
     * micro-tile coordinates passed in r1.
     */
    if (mask & WRITEMASK_X) {
	brw_ADD(p,
		vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 4), 2, 4, 0),
		brw_imm_v(0x10101010));
    }

    if (mask & WRITEMASK_Y) {
	brw_ADD(p,
		vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 5), 2, 4, 0),
		brw_imm_v(0x11001100));
    }

}
示例#4
0
static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, 
					       const GLfloat *param_ptr )
{
   GLuint i = c->prog_data.nr_params++;
   
   if (i >= BRW_WM_MAX_PARAM) {
      printf("%s: out of params\n", __FUNCTION__);
      c->prog_data.error = 1;
      return NULL;
   }
   else {
      struct brw_wm_ref *ref = get_ref(c);

      c->prog_data.param[i] = param_ptr;
      c->prog_data.param_convert[i] = PARAM_NO_CONVERT;
      c->nr_creg = (i+16)/16;

      /* Push the offsets into hw_reg.  These will be added to the
       * real register numbers once one is allocated in pass2.
       */
      ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
      ref->value = &c->creg[i/16];
      ref->insn = 0;
      ref->prevuse = NULL;

      return ref;
   }
}
static void emit_pixel_xy(struct brw_compile *p,
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0)
{
   struct brw_reg r1 = brw_vec1_grf(1, 0);
   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);

   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   /* Calculate pixel centers by adding 1 or 0 to each of the
    * micro-tile coordinates passed in r1.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
	      stride(suboffset(r1_uw, 4), 2, 4, 0),
	      brw_imm_v(0x10101010));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
	      stride(suboffset(r1_uw,5), 2, 4, 0),
	      brw_imm_v(0x11001100));
   }

   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
示例#6
0
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
void emit_frontfacing(struct brw_compile *p,
		      const struct brw_reg *dst,
		      GLuint mask)
{
   struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
   GLuint i;

   if (!(mask & WRITEMASK_XYZW))
      return;

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 brw_MOV(p, dst[i], brw_imm_f(0.0));
      }
   }

   /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
    * us front face
    */
   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 brw_MOV(p, dst[i], brw_imm_f(1.0));
      }
   }
   brw_set_predicate_control_flag_value(p, 0xff);
}
static void emit_delta_xy(struct brw_compile *p,
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0,
			  const struct brw_reg *arg1)
{
   struct brw_reg r1 = brw_vec1_grf(1, 0);

   /* Calc delta X,Y by subtracting origin in r1 from the pixel
    * centers.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      dst[0],
	      retype(arg0[0], BRW_REGISTER_TYPE_UW),
	      negate(r1));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      dst[1],
	      retype(arg0[1], BRW_REGISTER_TYPE_UW),
	      negate(suboffset(r1,1)));

   }
}
static void emit_pixel_w( struct brw_compile *p,
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0,
			  const struct brw_reg *deltas)
{
   /* Don't need this if all you are doing is interpolating color, for
    * instance.
    */
   if (mask & WRITEMASK_W) {      
      struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);

      /* Calc 1/w - just linterp wpos[3] optimized by putting the
       * result straight into a message reg.
       */
      brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);

      /* Calc w */
      brw_math_16( p, dst[3],
		   BRW_MATH_FUNCTION_INV,
		   BRW_MATH_SATURATE_NONE,
		   2, brw_null_reg(),
		   BRW_MATH_PRECISION_FULL);
   }
}
static void emit_delta_xy(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_reg r1 = brw_vec1_grf(1, 0);
    struct brw_reg dst0, dst1, src0, src1;
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    dst0 = get_dst_reg(c, inst, 0, 1);
    dst1 = get_dst_reg(c, inst, 1, 1);
    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
    src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
    /* Calc delta X,Y by subtracting origin in r1 from the pixel
     * centers.
     */
    if (mask & WRITEMASK_X) {
	brw_ADD(p,
		dst0,
		retype(src0, BRW_REGISTER_TYPE_UW),
		negate(r1));
    }

    if (mask & WRITEMASK_Y) {
	brw_ADD(p,
		dst1,
		retype(src1, BRW_REGISTER_TYPE_UW),
		negate(suboffset(r1,1)));

    }

}
示例#10
0
static void
gen_PLN_MRF_GRF_GRF(struct brw_codegen *p)
{
   struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0);
   struct brw_reg interp = brw_vec1_grf(2, 0);
   struct brw_reg g4 = brw_vec8_grf(4, 0);

   brw_PLN(p, m6, interp, g4);
}
示例#11
0
void emit_pixel_w(struct brw_wm_compile *c,
		  const struct brw_reg *dst,
		  GLuint mask,
		  const struct brw_reg *arg0,
		  const struct brw_reg *deltas)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg src;
   struct brw_reg temp_dst;

   if (intel->gen >= 6)
	temp_dst = dst[3];
   else
	temp_dst = brw_message_reg(2);

   assert(intel->gen < 6);

   /* Don't need this if all you are doing is interpolating color, for
    * instance.
    */
   if (mask & WRITEMASK_W) {      
      struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);

      /* Calc 1/w - just linterp wpos[3] optimized by putting the
       * result straight into a message reg.
       */
      if (can_do_pln(intel, deltas)) {
	 brw_PLN(p, temp_dst, interp3, deltas[0]);
      } else {
	 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
	 brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]);
      }

      /* Calc w */
      if (intel->gen >= 6)
	 src = temp_dst;
      else
	 src = brw_null_reg();

      if (c->dispatch_width == 16) {
	 brw_math_16(p, dst[3],
		     BRW_MATH_FUNCTION_INV,
		     BRW_MATH_SATURATE_NONE,
		     2, src,
		     BRW_MATH_PRECISION_FULL);
      } else {
	 brw_math(p, dst[3],
		  BRW_MATH_FUNCTION_INV,
		  BRW_MATH_SATURATE_NONE,
		  2, src,
		  BRW_MATH_DATA_VECTOR,
		  BRW_MATH_PRECISION_FULL);
      }
   }
}
static void emit_kil(struct brw_wm_compile *c)
{
	struct brw_compile *p = &c->func;
	struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
	brw_push_insn_state(p);
	brw_set_mask_control(p, BRW_MASK_DISABLE);
	brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
	brw_AND(p, depth, c->emit_mask_reg, depth);
	brw_pop_insn_state(p);
}
示例#13
0
void emit_cinterp(struct brw_compile *p,
		  const struct brw_reg *dst,
		  GLuint mask,
		  const struct brw_reg *arg0)
{
   struct brw_reg interp[4];
   GLuint nr = arg0[0].nr;
   GLuint i;

   interp[0] = brw_vec1_grf(nr, 0);
   interp[1] = brw_vec1_grf(nr, 4);
   interp[2] = brw_vec1_grf(nr+1, 0);
   interp[3] = brw_vec1_grf(nr+1, 4);

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
         brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
      }
   }
}
示例#14
0
void
gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
{
   struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
   gen8_instruction *inst;

   /* Enable Channel Masks in the URB_WRITE_HWORD message header */
   default_state.access_mode = BRW_ALIGN_1;
   inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
                    BRW_REGISTER_TYPE_UD),
             retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
             brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
   default_state.access_mode = BRW_ALIGN_16;

   /* mlen = 2: g0 header + vertex count */
   inst = next_inst(BRW_OPCODE_SEND);
   gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
   gen8_set_dst(brw, inst, brw_null_reg());
   gen8_set_src0(brw, inst, src);
}
static void emit_linterp( struct brw_compile *p, 
			 const struct brw_reg *dst,
			 GLuint mask,
			 const struct brw_reg *arg0,
			 const struct brw_reg *deltas )
{
   struct brw_reg interp[4];
   GLuint nr = arg0[0].nr;
   GLuint i;

   interp[0] = brw_vec1_grf(nr, 0);
   interp[1] = brw_vec1_grf(nr, 4);
   interp[2] = brw_vec1_grf(nr+1, 0);
   interp[3] = brw_vec1_grf(nr+1, 4);

   for(i = 0; i < 4; i++ ) {
      if (mask & (1<<i)) {
	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
      }
   }
}
示例#16
0
void emit_pinterp(struct brw_compile *p,
		  const struct brw_reg *dst,
		  GLuint mask,
		  const struct brw_reg *arg0,
		  const struct brw_reg *deltas,
		  const struct brw_reg *w)
{
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg interp[4];
   GLuint nr = arg0[0].nr;
   GLuint i;

   if (intel->gen >= 6) {
      emit_linterp(p, dst, mask, arg0, interp);
      return;
   }

   interp[0] = brw_vec1_grf(nr, 0);
   interp[1] = brw_vec1_grf(nr, 4);
   interp[2] = brw_vec1_grf(nr+1, 0);
   interp[3] = brw_vec1_grf(nr+1, 4);

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 if (can_do_pln(intel, deltas)) {
	    brw_PLN(p, dst[i], interp[i], deltas[0]);
	 } else {
	    brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
	    brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
	 }
      }
   }
   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 brw_MUL(p, dst[i], dst[i], w[3]);
      }
   }
}
示例#17
0
static void brw_wm_affine_st(struct brw_compile *p, int dw,
			     int channel, int msg)
{
	int uv;

	if (dw == 16) {
		brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
		uv = p->gen >= 060 ? 6 : 3;
	} else {
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
		uv = p->gen >= 060 ? 4 : 3;
	}
	uv += 2*channel;

	msg++;
	if (p->gen >= 060) {
		brw_PLN(p,
			brw_message_reg(msg),
			brw_vec1_grf(uv, 0),
			brw_vec8_grf(2, 0));
		msg += dw/8;

		brw_PLN(p,
			brw_message_reg(msg),
			brw_vec1_grf(uv, 4),
			brw_vec8_grf(2, 0));
	} else {
		struct brw_reg r = brw_vec1_grf(uv, 0);

		brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
		brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
		msg += dw/8;

		brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
		brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
	}
}
static void prealloc_reg(struct brw_wm_compile *c)
{
    int i, j;
    struct brw_reg reg;
    int nr_interp_regs = 0;
    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;

    for (i = 0; i < 4; i++) {
	reg = (i < c->key.nr_depth_regs) 
	    ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
    }
    c->reg_index += 2*c->key.nr_depth_regs;
    {
	int nr_params = c->fp->program.Base.Parameters->NumParameters;
	struct gl_program_parameter_list *plist = 
	    c->fp->program.Base.Parameters;
	int index = 0;
	c->prog_data.nr_params = 4*nr_params;
	for (i = 0; i < nr_params; i++) {
	    for (j = 0; j < 4; j++, index++) {
		reg = brw_vec1_grf(c->reg_index + index/8, 
			index%8);
		c->prog_data.param[index] = 
		    &plist->ParameterValues[i][j];
		set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
	    }
	}
	c->nr_creg = 2*((4*nr_params+15)/16);
	c->reg_index += c->nr_creg;
    }
    for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
	if (inputs & (1<<i)) {
	    nr_interp_regs++;
	    reg = brw_vec8_grf(c->reg_index, 0);
	    for (j = 0; j < 4; j++)
		set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
	    c->reg_index += 2;

	}
    }
    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
    c->prog_data.urb_read_length = nr_interp_regs * 2;
    c->prog_data.curb_read_length = c->nr_creg;
    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
    c->reg_index++;
    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
    c->reg_index += 2;
}
/* Kill pixel - set execution mask to zero for those pixels which
 * fail.
 */
static void emit_kil( struct brw_wm_compile *c,
		      struct brw_reg *arg0)
{
   struct brw_compile *p = &c->func;
   struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
   GLuint i;
   

   /* XXX - usually won't need 4 compares!
    */
   for (i = 0; i < 4; i++) {
      brw_push_insn_state(p);
      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
      brw_set_predicate_control_flag_value(p, 0xff);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_AND(p, r0uw, brw_flag_reg(), r0uw);
      brw_pop_insn_state(p);
   }
}
示例#20
0
/**
 * Emit code that kills pixels whose X and Y coordinates are outside the
 * boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
 * dst_x1, dst_y1).
 */
void
brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg &x,
                                                const struct brw_reg &y,
                                                const struct brw_reg &dst_x0,
                                                const struct brw_reg &dst_x1,
                                                const struct brw_reg &dst_y0,
                                                const struct brw_reg &dst_y1)
{
   struct brw_reg f0 = brw_flag_reg(0, 0);
   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);

   emit_cmp(BRW_CONDITIONAL_GE, x, dst_x0);
   emit_cmp(BRW_CONDITIONAL_GE, y, dst_y0)->predicate = BRW_PREDICATE_NORMAL;
   emit_cmp(BRW_CONDITIONAL_L, x, dst_x1)->predicate = BRW_PREDICATE_NORMAL;
   emit_cmp(BRW_CONDITIONAL_L, y, dst_y1)->predicate = BRW_PREDICATE_NORMAL;

   fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, g1, f0, g1);
   inst->force_writemask_all = true;
   insts.push_tail(inst);
}
示例#21
0
/**
 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
 * Scratch offset should be a multiple of 64.
 * Used for register spilling.
 */
void brw_dp_WRITE_16( struct brw_compile *p,
		      struct brw_reg src,
		      GLuint scratch_offset )
{
   GLuint msg_reg_nr = 1;
   {
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);

      /* set message header global offset field (reg 0, element 2) */
      brw_MOV(p,
	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
	      brw_imm_d(scratch_offset));

      brw_pop_insn_state(p);
   }

   {
      GLuint msg_length = 3;
      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = 0; /* XXX */
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
  
      brw_set_dest(insn, dest);
      brw_set_src0(insn, src);

      brw_set_dp_write_message(p->brw,
			       insn,
			       255, /* binding table index (255=stateless) */
			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
			       msg_length,
			       0, /* pixel scoreboard */
			       0, /* response_length */
			       0); /* eot */
   }
}
示例#22
0
static void wm_src_sample_argb(struct brw_compile *p)
{
	static const uint32_t fragment[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
#include "exa_wm_write.g6b"
	};
	int n;

	brw_push_insn_state(p);
	brw_set_mask_control(p, BRW_MASK_DISABLE);
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	brw_MOV(p,
		retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD),
		brw_imm_ud(0));
	brw_pop_insn_state(p);

	brw_SAMPLE(p,
		   retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW),
		   1,
		   retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD),
		   1, 0,
		   WRITEMASK_XYZW,
		   GEN5_SAMPLER_MESSAGE_SAMPLE,
		   8,
		   5,
		   true,
		   BRW_SAMPLER_SIMD_MODE_SIMD16);


	for (n = 0; n < p->nr_insn; n++) {
		brw_disasm(stdout, &p->store[n], 60);
	}

	printf("\n\n");
	for (n = 0; n < ARRAY_SIZE(fragment); n++) {
		brw_disasm(stdout,
			   (const struct brw_instruction *)&fragment[n][0],
			   60);
	}
}
示例#23
0
/**
 * Computes the screen-space x,y distance of the pixels from the start
 * vertex.
 *
 * This will be used in linterp or pinterp with the start vertex value
 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
 * to produce interpolated attribute values.
 */
void emit_delta_xy(struct brw_compile *p,
		   const struct brw_reg *dst,
		   GLuint mask,
		   const struct brw_reg *arg0)
{
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg r1 = brw_vec1_grf(1, 0);

   if (mask == 0)
      return;

   assert(mask == WRITEMASK_XY);

   if (intel->gen >= 6) {
       /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1.
	  Just add them with 0.0 for dst reg.. */
       r1 = brw_imm_v(0x00000000);
       brw_ADD(p,
	       dst[0],
	       retype(arg0[0], BRW_REGISTER_TYPE_UW),
	       r1);
       brw_ADD(p,
	       dst[1],
	       retype(arg0[1], BRW_REGISTER_TYPE_UW),
	       r1);
       return;
   }

   /* Calc delta X,Y by subtracting origin in r1 from the pixel
    * centers produced by emit_pixel_xy().
    */
   brw_ADD(p,
	   dst[0],
	   retype(arg0[0], BRW_REGISTER_TYPE_UW),
	   negate(r1));
   brw_ADD(p,
	   dst[1],
	   retype(arg0[1], BRW_REGISTER_TYPE_UW),
	   negate(suboffset(r1,1)));
}
示例#24
0
/**
 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
 * Scratch offset should be a multiple of 64.
 * Used for register spilling.
 */
void brw_dp_READ_16( struct brw_compile *p,
		      struct brw_reg dest,
		      GLuint scratch_offset )
{
   GLuint msg_reg_nr = 1;
   {
      brw_push_insn_state(p);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_mask_control(p, BRW_MASK_DISABLE);

      /* set message header global offset field (reg 0, element 2) */
      brw_MOV(p,
	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
	      brw_imm_d(scratch_offset));

      brw_pop_insn_state(p);
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = 0; /* XXX */
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
  
      brw_set_dest(insn, dest);	/* UW? */
      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));

      brw_set_dp_read_message(p->brw,
			      insn,
			      255, /* binding table index (255=stateless) */
			      3,  /* msg_control (3 means 4 Owords) */
			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
			      1, /* target cache (render/scratch) */
			      1, /* msg_length */
			      2, /* response_length */
			      0); /* eot */
   }
}
示例#25
0
/**
 * Computes the screen-space x,y position of the pixels.
 *
 * This will be used by emit_delta_xy() or emit_wpos_xy() for
 * interpolation of attributes..
 *
 * Payload R0:
 *
 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
 *         corresponding to each of the 16 execution channels.
 * R0.1..8 -- ?
 * R1.0 -- triangle vertex 0.X
 * R1.1 -- triangle vertex 0.Y
 * R1.2 -- tile 0 x,y coords (2 packed uwords)
 * R1.3 -- tile 1 x,y coords (2 packed uwords)
 * R1.4 -- tile 2 x,y coords (2 packed uwords)
 * R1.5 -- tile 3 x,y coords (2 packed uwords)
 * R1.6 -- ?
 * R1.7 -- ?
 * R1.8 -- ?
 */
void emit_pixel_xy(struct brw_wm_compile *c,
		   const struct brw_reg *dst,
		   GLuint mask)
{
   struct brw_compile *p = &c->func;
   struct brw_reg r1 = brw_vec1_grf(1, 0);
   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
   struct brw_reg dst0_uw, dst1_uw;

   brw_push_insn_state(p);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   if (c->dispatch_width == 16) {
      dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
   } else {
      dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
   }

   /* Calculate pixel centers by adding 1 or 0 to each of the
    * micro-tile coordinates passed in r1.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      dst0_uw,
	      stride(suboffset(r1_uw, 4), 2, 4, 0),
	      brw_imm_v(0x10101010));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      dst1_uw,
	      stride(suboffset(r1_uw,5), 2, 4, 0),
	      brw_imm_v(0x11001100));
   }
   brw_pop_insn_state(p);
}
示例#26
0
void
gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
{
   struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);

   /* Copy g0. */
   if (vs)
      MOV_RAW(header, brw_vec8_grf(0, 0));

   gen8_instruction *inst;
   if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
      /* Enable Channel Masks in the URB_WRITE_OWORD message header */
      default_state.access_mode = BRW_ALIGN_1;
      MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
              brw_imm_ud(0xff00));
      default_state.access_mode = BRW_ALIGN_16;
   }

   inst = next_inst(BRW_OPCODE_SEND);
   gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
                        true);
   gen8_set_dst(brw, inst, brw_null_reg());
   gen8_set_src0(brw, inst, header);
}
示例#27
0
static void alloc_regs( struct brw_sf_compile *c )
{
   GLuint reg, i;

   /* Values computed by fixed function unit:
    */
   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
   c->det = brw_vec1_grf(1, 2);
   c->dx0 = brw_vec1_grf(1, 3);
   c->dx2 = brw_vec1_grf(1, 4);
   c->dy0 = brw_vec1_grf(1, 5);
   c->dy2 = brw_vec1_grf(1, 6);

   /* z and 1/w passed in seperately:
    */
   c->z[0]     = brw_vec1_grf(2, 0);
   c->inv_w[0] = brw_vec1_grf(2, 1);
   c->z[1]     = brw_vec1_grf(2, 2);
   c->inv_w[1] = brw_vec1_grf(2, 3);
   c->z[2]     = brw_vec1_grf(2, 4);
   c->inv_w[2] = brw_vec1_grf(2, 5);

   /* The vertices:
    */
   reg = 3;
   for (i = 0; i < c->nr_verts; i++) {
      c->vert[i] = brw_vec8_grf(reg, 0);
      reg += c->nr_attr_regs;
   }

   /* Temporaries, allocated after last vertex reg.
    */
   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
   c->tmp = brw_vec8_grf(reg, 0);  reg++;

   /* Note grf allocation:
    */
   c->prog_data.total_grf = reg;


   /* Outputs of this program - interpolation coefficients for
    * rasterization:
    */
   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
}
示例#28
0
static void brw_wm_projective_st(struct brw_compile *p, int dw,
				 int channel, int msg)
{
	int uv;

	if (dw == 16) {
		brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
		uv = p->gen >= 060 ? 6 : 3;
	} else {
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
		uv = p->gen >= 060 ? 4 : 3;
	}
	uv += 2*channel;

	msg++;
	if (p->gen >= 060) {
		/* First compute 1/z */
		brw_PLN(p,
			brw_vec8_grf(30, 0),
			brw_vec1_grf(uv+1, 0),
			brw_vec8_grf(2, 0));

		if (dw == 16) {
			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
			brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
			brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
		} else
			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));

		brw_PLN(p,
			brw_vec8_grf(26, 0),
			brw_vec1_grf(uv, 0),
			brw_vec8_grf(2, 0));
		brw_PLN(p,
			brw_vec8_grf(28, 0),
			brw_vec1_grf(uv, 4),
			brw_vec8_grf(2, 0));

		brw_MUL(p,
			brw_message_reg(msg),
			brw_vec8_grf(26, 0),
			brw_vec8_grf(30, 0));
		brw_MUL(p,
			brw_message_reg(msg + dw/8),
			brw_vec8_grf(28, 0),
			brw_vec8_grf(30, 0));
	} else {
		struct brw_reg r = brw_vec1_grf(uv, 0);

		/* First compute 1/z */
		brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
		brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));

		if (dw == 16) {
			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
			brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
			brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
		} else
			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));

		/* Now compute the output s,t values */
		brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
		brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
		brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
		msg += dw/8;

		brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
		brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
		brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
	}
}
示例#29
0
static void brw_wm_write__opacity(struct brw_compile *p, int dw,
				  int src, int mask)
{
	int n;

	if (dw == 8 && p->gen >= 060) {
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);

		brw_MUL(p,
			brw_message_reg(2),
			brw_vec8_grf(src+0, 0),
			brw_vec1_grf(mask, 3));
		brw_MUL(p,
			brw_message_reg(3),
			brw_vec8_grf(src+1, 0),
			brw_vec1_grf(mask, 3));
		brw_MUL(p,
			brw_message_reg(4),
			brw_vec8_grf(src+2, 0),
			brw_vec1_grf(mask, 3));
		brw_MUL(p,
			brw_message_reg(5),
			brw_vec8_grf(src+3, 0),
			brw_vec1_grf(mask, 3));

		goto done;
	}

	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);

	for (n = 0; n < 4; n++) {
		if (p->gen >= 060) {
			brw_MUL(p,
				brw_message_reg(2 + 2*n),
				brw_vec8_grf(src + 2*n, 0),
				brw_vec1_grf(mask, 3));
		} else if (p->gen >= 045 && dw == 16) {
			brw_MUL(p,
				brw_message_reg(2 + n + BRW_MRF_COMPR4),
				brw_vec8_grf(src + 2*n, 0),
				brw_vec1_grf(mask, 3));
		} else {
			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
			brw_MUL(p,
				brw_message_reg(2 + n),
				brw_vec8_grf(src + 2*n, 0),
				brw_vec1_grf(mask, 3));

			if (dw == 16) {
				brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
				brw_MUL(p,
					brw_message_reg(2 + n + 4),
					brw_vec8_grf(src + 2*n+1, 0),
					brw_vec1_grf(mask, 3));
			}
		}
	}

done:
	brw_fb_write(p, dw);
}
示例#30
0
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
			      GLuint nr_verts )
{
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   if (c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec4_grf(i, 0);
      i += (6 + c->key.nr_userclip + 1) / 2;

      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
   }
   else
      c->prog_data.curb_read_length = 0;


   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   if (c->key.nr_attrs & 1) {
      for (j = 0; j < 3; j++) {
	 GLuint delta = c->key.nr_attrs*16 + 32;

         if (c->chipset.is_igdng)
             delta = c->key.nr_attrs * 16 + 32 * 3;

	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
      }
   }

   c->reg.t          = brw_vec1_grf(i, 0);
   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
   c->reg.plane_equation = brw_vec4_grf(i, 4);
   i++;

   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
   c->reg.dp         = brw_vec1_grf(i, 4);
   i++;

   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   if (!c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
      i++;
   }

   if (c->key.do_unfilled) {
      c->reg.dir     = brw_vec4_grf(i, 0);
      c->reg.offset  = brw_vec4_grf(i, 4);
      i++;
      c->reg.tmp0    = brw_vec4_grf(i, 0);
      c->reg.tmp1    = brw_vec4_grf(i, 4);
      i++;
   }

   if (c->need_ff_sync) {
      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
      i++;
   }

   c->first_tmp = i;
   c->last_tmp = i;

   c->prog_data.urb_read_length = c->nr_regs; /* ? */
   c->prog_data.total_grf = i;
}