Beispiel #1
0
void
vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
        struct brw_reg src0,
        struct brw_reg src1)
{
    /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
     * Header: M0.3):
     *
     *     Slot 0 Offset. This field, after adding to the Global Offset field
     *     in the message descriptor, specifies the offset (in 256-bit units)
     *     from the start of the URB entry, as referenced by URB Handle 0, at
     *     which the data will be accessed.
     *
     * Similar text describes DWORD M0.4, which is slot 1 offset.
     *
     * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
     * of the register for geometry shader invocations 0 and 1) by the
     * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
     *
     * We can do this with the following EU instruction:
     *
     *     mul(2) dst.3<1>UD src0<8;2,4>UD src1   { Align1 WE_all }
     */
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
            src1);
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Beispiel #2
0
static void do_flatshade_line( struct brw_sf_compile *c )
{
    struct brw_compile *p = &c->func;
    struct intel_context *intel = &p->brw->intel;
    struct brw_reg ip = brw_ip_reg();
    GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;

    if (!nr)
        return;

    /* Already done in clip program:
     */
    if (c->key.primitive == SF_UNFILLED_TRIS)
        return;

    if (intel->gen == 5)
        jmpi = 2;

    brw_push_insn_state(p);

    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
    brw_JMPI(p, ip, ip, c->pv);
    copy_colors(c, c->vert[1], c->vert[0]);

    brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
    copy_colors(c, c->vert[0], c->vert[1]);

    brw_pop_insn_state(p);
}
static void emit_min(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg src0, src1, dst;
    int i;
    brw_push_insn_state(p);
    for (i = 0; i < 4; i++) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_MOV(p, dst, src0);
	    brw_set_saturate(p, 0);

	    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	    brw_MOV(p, dst, src1);
	    brw_set_saturate(p, 0);
	    brw_set_predicate_control_flag_value(p, 0xff);
	}
    }
    brw_pop_insn_state(p);
}
Beispiel #4
0
/* Kill pixel - set execution mask to zero for those pixels which
 * fail.
 */
static void emit_kil( struct brw_wm_compile *c,
		      struct brw_reg *arg0)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg pixelmask;
   GLuint i, j;

   if (intel->gen >= 6)
      pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
   else
      pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);

   for (i = 0; i < 4; i++) {
      /* Check if we've already done the comparison for this reg
       * -- common when someone does KIL TEMP.wwww.
       */
      for (j = 0; j < i; j++) {
	 if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
	    break;
      }
      if (j != i)
	 continue;

      brw_push_insn_state(p);
      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
      brw_set_predicate_control_flag_value(p, 0xff);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_AND(p, pixelmask, brw_flag_reg(), pixelmask);
      brw_pop_insn_state(p);
   }
}
static void fire_fb_write( struct brw_wm_compile *c,
			   GLuint base_reg,
			   GLuint nr,
			   GLuint target,
			   GLuint eot )
{
   struct brw_compile *p = &c->func;
   
   /* Pass through control information:
    */
/*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
   {
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_MOV(p, 
	       brw_message_reg(base_reg + 1),
	       brw_vec8_grf(1, 0));
      brw_pop_insn_state(p);
   }

   /* Send framebuffer write message: */
/*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
   brw_fb_WRITE(p,
		retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
		base_reg,
		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
		target,		
		nr,
		0, 
		eot);
}
/* Need to use a computed jump to copy flatshaded attributes as the
 * vertices are ordered according to y-coordinate before reaching this
 * point, so the PV could be anywhere.
 */
static void do_flatshade_triangle( struct brw_sf_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg ip = brw_ip_reg();
   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
   if (!nr)
      return;

   /* Already done in clip program:
    */
   if (c->key.primitive == SF_UNFILLED_TRIS)
      return;

   brw_push_insn_state(p);
   
   brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
   brw_JMPI(p, ip, ip, c->pv);

   copy_colors(c, c->vert[1], c->vert[0]);
   copy_colors(c, c->vert[2], c->vert[0]);
   brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));

   copy_colors(c, c->vert[0], c->vert[1]);
   copy_colors(c, c->vert[2], c->vert[1]);
   brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));

   copy_colors(c, c->vert[0], c->vert[2]);
   copy_colors(c, c->vert[1], c->vert[2]);

   brw_pop_insn_state(p);
}
Beispiel #7
0
void emit_math1(struct brw_wm_compile *c,
		GLuint function,
		const struct brw_reg *dst,
		GLuint mask,
		const struct brw_reg *arg0)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
   GLuint saturate = ((mask & SATURATE) ?
		      BRW_MATH_SATURATE_SATURATE :
		      BRW_MATH_SATURATE_NONE);
   struct brw_reg src;

   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code */

   assert(is_power_of_two(mask & WRITEMASK_XYZW));

   if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
			    arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
			   arg0[0].negate || arg0[0].abs)) {
      /* Gen6 math requires that source and dst horizontal stride be 1,
       * and that the argument be in the GRF.
       *
       * The hardware ignores source modifiers (negate and abs) on math
       * instructions, so we also move to a temp to set those up.
       */
      src = dst[dst_chan];
      brw_MOV(p, src, arg0[0]);
   } else {
      src = arg0[0];
   }

   /* Send two messages to perform all 16 operations:
    */
   brw_push_insn_state(p);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_math(p,
	    dst[dst_chan],
	    function,
	    saturate,
	    2,
	    src,
	    BRW_MATH_DATA_VECTOR,
	    BRW_MATH_PRECISION_FULL);

   if (c->dispatch_width == 16) {
      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
      brw_math(p,
	       offset(dst[dst_chan],1),
	       function,
	       saturate,
	       3,
	       sechalf(src),
	       BRW_MATH_DATA_VECTOR,
	       BRW_MATH_PRECISION_FULL);
   }
   brw_pop_insn_state(p);
}
Beispiel #8
0
void
vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
        struct brw_reg src)
{
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);

    /* If we think of the src and dst registers as composed of 8 DWORDs each,
     * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
     * them to WORDs, and then pack them into DWORD 2 of dst.
     *
     * It's easier to get the EU to do this if we think of the src and dst
     * registers as composed of 16 WORDS each; then, we want to pick up the
     * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 of
     * dst.
     *
     * We can do that by the following EU instruction:
     *
     *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
     */
    brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
            stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Beispiel #9
0
void
vec4_generator::generate_math2_gen4(vec4_instruction *inst,
                                    struct brw_reg dst,
                                    struct brw_reg src0,
                                    struct brw_reg src1)
{
    /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
     * "Message Payload":
     *
     * "Operand0[7].  For the INT DIV functions, this operand is the
     *  denominator."
     *  ...
     * "Operand1[7].  For the INT DIV functions, this operand is the
     *  numerator."
     */
    bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
    struct brw_reg &op0 = is_int_div ? src1 : src0;
    struct brw_reg &op1 = is_int_div ? src0 : src1;

    brw_push_insn_state(p);
    brw_set_saturate(p, false);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
    brw_pop_insn_state(p);

    brw_math(p,
             dst,
             brw_math_function(inst->opcode),
             inst->base_mrf,
             op0,
             BRW_MATH_DATA_VECTOR,
             BRW_MATH_PRECISION_FULL);
}
static void emit_kil(struct brw_wm_compile *c)
{
	struct brw_compile *p = &c->func;
	struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
	brw_push_insn_state(p);
	brw_set_mask_control(p, BRW_MASK_DISABLE);
	brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
	brw_AND(p, depth, c->emit_mask_reg, depth);
	brw_pop_insn_state(p);
}
static void
gen_f0_0_MOV_GRF_GRF(struct brw_codegen *p)
{
   struct brw_reg g0 = brw_vec8_grf(0, 0);
   struct brw_reg g2 = brw_vec8_grf(2, 0);

   brw_push_insn_state(p);
   brw_set_default_predicate_control(p, true);
   brw_MOV(p, g0, g2);
   brw_pop_insn_state(p);
}
Beispiel #12
0
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7.  Explicitly test
 * it, so that we run the fuzzing can run over all the other bits that might
 * interact with it.
 */
static void
gen_f0_1_MOV_GRF_GRF(struct brw_compile *p)
{
   struct brw_reg g0 = brw_vec8_grf(0, 0);
   struct brw_reg g2 = brw_vec8_grf(2, 0);

   brw_push_insn_state(p);
   brw_set_predicate_control(p, true);
   struct brw_instruction *mov = brw_MOV(p, g0, g2);
   mov->bits2.da1.flag_subreg_nr = 1;
   brw_pop_insn_state(p);
}
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7.  Explicitly test
 * it, so that we run the fuzzing can run over all the other bits that might
 * interact with it.
 */
static void
gen_f0_1_MOV_GRF_GRF(struct brw_codegen *p)
{
   struct brw_reg g0 = brw_vec8_grf(0, 0);
   struct brw_reg g2 = brw_vec8_grf(2, 0);

   brw_push_insn_state(p);
   brw_set_default_predicate_control(p, true);
   brw_inst *mov = brw_MOV(p, g0, g2);
   brw_inst_set_flag_subreg_nr(p->devinfo, mov, 1);
   brw_pop_insn_state(p);
}
Beispiel #14
0
void
vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
        struct brw_reg src)
{
    assert(src.file == BRW_IMMEDIATE_VALUE);

    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_MOV(p, suboffset(vec1(dst), 2), src);
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Beispiel #15
0
/**
 * Extended math function, float[16].
 * Use 2 send instructions.
 */
void brw_math_16( struct brw_compile *p,
		  struct brw_reg dest,
		  GLuint function,
		  GLuint saturate,
		  GLuint msg_reg_nr,
		  struct brw_reg src,
		  GLuint precision )
{
   struct brw_instruction *insn;
   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 

   /* First instruction:
    */
   brw_push_insn_state(p);
   brw_set_predicate_control_flag_value(p, 0xff);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   insn = next_insn(p, BRW_OPCODE_SEND);
   insn->header.destreg__conditionalmod = msg_reg_nr;

   brw_set_dest(insn, dest);
   brw_set_src0(insn, src);
   brw_set_math_message(p->brw,
			insn, 
			msg_length, response_length, 
			function,
			BRW_MATH_INTEGER_UNSIGNED,
			precision,
			saturate,
			BRW_MATH_DATA_VECTOR);

   /* Second instruction:
    */
   insn = next_insn(p, BRW_OPCODE_SEND);
   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
   insn->header.destreg__conditionalmod = msg_reg_nr+1;

   brw_set_dest(insn, offset(dest,1));
   brw_set_src0(insn, src);
   brw_set_math_message(p->brw, 
			insn, 
			msg_length, response_length, 
			function,
			BRW_MATH_INTEGER_UNSIGNED,
			precision,
			saturate,
			BRW_MATH_DATA_VECTOR);

   brw_pop_insn_state(p);
}
Beispiel #16
0
static void emit_aa( struct brw_wm_compile *c,
		     struct brw_reg *arg1,
		     GLuint reg )
{
   struct brw_compile *p = &c->func;
   GLuint comp = c->aa_dest_stencil_reg / 2;
   GLuint off = c->aa_dest_stencil_reg % 2;
   struct brw_reg aa = offset(arg1[comp], off);

   brw_push_insn_state(p);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
   brw_MOV(p, brw_message_reg(reg), aa);
   brw_pop_insn_state(p);
}
Beispiel #17
0
/**
 * Read a float[4] vector from the data port Data Cache (const buffer).
 * Location (in buffer) should be a multiple of 16.
 * Used for fetching shader constants.
 * If relAddr is true, we'll do an indirect fetch using the address register.
 */
void brw_dp_READ_4( struct brw_compile *p,
                    struct brw_reg dest,
                    GLboolean relAddr,
                    GLuint location,
                    GLuint bind_table_index )
{
   /* XXX: relAddr not implemented */
   GLuint msg_reg_nr = 1;
   {
      struct brw_reg b;
      brw_push_insn_state(p);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_mask_control(p, BRW_MASK_DISABLE);

   /* Setup MRF[1] with location/offset into const buffer */
      b = brw_message_reg(msg_reg_nr);
      b = retype(b, BRW_REGISTER_TYPE_UD);
      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
       */
      brw_MOV(p, b, brw_imm_ud(location));
      brw_pop_insn_state(p);
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = BRW_PREDICATE_NONE;
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
      insn->header.mask_control = BRW_MASK_DISABLE;
  
      /* cast dest to a uword[8] vector */
      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);

      brw_set_dest(insn, dest);
      brw_set_src0(insn, brw_null_reg());

      brw_set_dp_read_message(p->brw,
			      insn,
			      bind_table_index,
			      0,  /* msg_control (0 means 1 Oword) */
			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
			      0, /* source cache = data cache */
			      1, /* msg_length */
			      1, /* response_length (1 Oword) */
			      0); /* eot */
   }
}
Beispiel #18
0
static void copy_z_inv_w( struct brw_sf_compile *c )
{
    struct brw_compile *p = &c->func;
    GLuint i;

    brw_push_insn_state(p);

    /* Copy both scalars with a single MOV:
     */
    for (i = 0; i < c->nr_verts; i++)
        brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));

    brw_pop_insn_state(p);
}
static void emit_math2( struct brw_compile *p, 
			GLuint function,
			const struct brw_reg *dst,
			GLuint mask,
			const struct brw_reg *arg0,
			const struct brw_reg *arg1)
{
   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code*/

   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);

   brw_push_insn_state(p);

   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_MOV(p, brw_message_reg(2), arg0[0]);
   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
   brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));

   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_MOV(p, brw_message_reg(3), arg1[0]);
   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
   brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));

   
   /* Send two messages to perform all 16 operations:
    */
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_math(p, 
	    dst[0],
	    function,
	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
	    2,
	    brw_null_reg(),
	    BRW_MATH_DATA_VECTOR,
	    BRW_MATH_PRECISION_FULL);

   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
   brw_math(p, 
	    offset(dst[0],1),
	    function,
	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
	    4,
	    brw_null_reg(),
	    BRW_MATH_DATA_VECTOR,
	    BRW_MATH_PRECISION_FULL);
   
   brw_pop_insn_state(p);
}
Beispiel #20
0
static void brw_fb_write(struct brw_compile *p, int dw)
{
	struct brw_instruction *insn;
	unsigned msg_control, msg_type, msg_len;
	struct brw_reg src0;
	bool header;

	if (dw == 16) {
		brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
		msg_len = 8;
	} else {
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
		msg_len = 4;
	}

	if (p->gen < 060) {
		brw_push_insn_state(p);
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
		brw_set_mask_control(p, BRW_MASK_DISABLE);
		brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
		brw_pop_insn_state(p);

		msg_len += 2;
	}

	/* The execution mask is ignored for render target writes. */
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
	insn->header.predicate_control = 0;
	insn->header.compression_control = BRW_COMPRESSION_NONE;

	if (p->gen >= 060) {
		msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
		src0 = brw_message_reg(2);
		header = false;
	} else {
		insn->header.destreg__conditionalmod = 0;
		msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
		src0 = __retype_uw(brw_vec8_grf(0, 0));
		header = true;
	}

	brw_set_dest(p, insn, null_result(dw));
	brw_set_src0(p, insn, src0);
	brw_set_dp_write_message(p, insn, 0,
				 msg_control, msg_type, msg_len,
				 header, true, 0, true, false);
}
Beispiel #21
0
/* How does predicate control work when execution_size != 8?  Do I
 * need to test/set for 0xffff when execution_size is 16?
 */
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
{
   p->current->header.predicate_control = BRW_PREDICATE_NONE;

   if (value != 0xff) {
      if (value != p->flag_value) {
	 brw_push_insn_state(p);
	 brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
	 p->flag_value = value;
	 brw_pop_insn_state(p);
      }

      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
   }   
}
Beispiel #22
0
void
vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
{
    /* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT
     * and store into dst.0 & dst.4. So generate the instruction:
     *
     *     shr(8) dst<1> R0<1,4,0> GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q }
     */
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    dst = retype(dst, BRW_REGISTER_TYPE_UD);
    struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
    brw_SHR(p, dst, stride(r0, 1, 4, 0),
            brw_imm_ud(GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT));
    brw_pop_insn_state(p);
}
Beispiel #23
0
void
vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
{
    /* We want to left shift just DWORD 4 (the x component belonging to the
     * second geometry shader invocation) by 4 bits.  So generate the
     * instruction:
     *
     *     shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
     */
    dst = suboffset(vec1(dst), 4);
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_SHL(p, dst, dst, brw_imm_ud(4));
    brw_pop_insn_state(p);
}
Beispiel #24
0
void
vec4_generator::generate_unpack_flags(vec4_instruction *inst,
                                      struct brw_reg dst)
{
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_access_mode(p, BRW_ALIGN_1);

    struct brw_reg flags = brw_flag_reg(0, 0);
    struct brw_reg dst_0 = suboffset(vec1(dst), 0);
    struct brw_reg dst_4 = suboffset(vec1(dst), 4);

    brw_AND(p, dst_0, flags, brw_imm_ud(0x0f));
    brw_AND(p, dst_4, flags, brw_imm_ud(0xf0));
    brw_SHR(p, dst_4, dst_4, brw_imm_ud(4));

    brw_pop_insn_state(p);
}
Beispiel #25
0
static void fire_fb_write( struct brw_wm_compile *c,
			   GLuint base_reg,
			   GLuint nr,
			   GLuint target,
			   GLuint eot )
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   uint32_t msg_control;

   /* Pass through control information:
    * 
    * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case.
    */
/*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
   if (intel->gen < 6)
   {
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_MOV(p, 
	       brw_message_reg(base_reg + 1),
	       brw_vec8_grf(1, 0));
      brw_pop_insn_state(p);
   }

   if (c->dispatch_width == 16)
      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
   else
      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;

   /* Send framebuffer write message: */
/*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
   brw_fb_WRITE(p,
		c->dispatch_width,
		base_reg,
		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
		msg_control,
		target,		
		nr,
		0, 
		eot,
		true);
}
/* Kill pixel - set execution mask to zero for those pixels which
 * fail.
 */
static void emit_kil( struct brw_wm_compile *c,
		      struct brw_reg *arg0)
{
   struct brw_compile *p = &c->func;
   struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
   GLuint i;
   

   /* XXX - usually won't need 4 compares!
    */
   for (i = 0; i < 4; i++) {
      brw_push_insn_state(p);
      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
      brw_set_predicate_control_flag_value(p, 0xff);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_AND(p, r0uw, brw_flag_reg(), r0uw);
      brw_pop_insn_state(p);
   }
}
static void emit_fb_write(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    int nr = 2;
    int channel;
    GLuint target, eot;
    struct brw_reg src0;

    /* Reserve a space for AA - may not be needed:
     */
    if (c->key.aa_dest_stencil_reg)
	nr += 1;
    {
	brw_push_insn_state(p);
	for (channel = 0; channel < 4; channel++) {
	    src0 = get_src_reg(c,  &inst->SrcReg[0], channel, 1);
	    /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
	    /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
	    brw_MOV(p, brw_message_reg(nr + channel), src0);
	}
	/* skip over the regs populated above: */
	nr += 8;
	brw_pop_insn_state(p);
    }

   if (c->key.source_depth_to_render_target)
   {
      if (c->key.computes_depth) {
         src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1);
         brw_MOV(p, brw_message_reg(nr), src0);
      } else {
         src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
         brw_MOV(p, brw_message_reg(nr), src0);
      }

      nr += 2;
   }
    target = inst->Sampler >> 1;
    eot = inst->Sampler & 1;
    fire_fb_write(c, 0, nr, target, eot);
}
Beispiel #28
0
/**
 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
 * Scratch offset should be a multiple of 64.
 * Used for register spilling.
 */
void brw_dp_WRITE_16( struct brw_compile *p,
		      struct brw_reg src,
		      GLuint scratch_offset )
{
   GLuint msg_reg_nr = 1;
   {
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);

      /* set message header global offset field (reg 0, element 2) */
      brw_MOV(p,
	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
	      brw_imm_d(scratch_offset));

      brw_pop_insn_state(p);
   }

   {
      GLuint msg_length = 3;
      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = 0; /* XXX */
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
  
      brw_set_dest(insn, dest);
      brw_set_src0(insn, src);

      brw_set_dp_write_message(p->brw,
			       insn,
			       255, /* binding table index (255=stateless) */
			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
			       msg_length,
			       0, /* pixel scoreboard */
			       0, /* response_length */
			       0); /* eot */
   }
}
Beispiel #29
0
static void wm_src_sample_argb(struct brw_compile *p)
{
	static const uint32_t fragment[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
#include "exa_wm_write.g6b"
	};
	int n;

	brw_push_insn_state(p);
	brw_set_mask_control(p, BRW_MASK_DISABLE);
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	brw_MOV(p,
		retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD),
		brw_imm_ud(0));
	brw_pop_insn_state(p);

	brw_SAMPLE(p,
		   retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW),
		   1,
		   retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD),
		   1, 0,
		   WRITEMASK_XYZW,
		   GEN5_SAMPLER_MESSAGE_SAMPLE,
		   8,
		   5,
		   true,
		   BRW_SAMPLER_SIMD_MODE_SIMD16);


	for (n = 0; n < p->nr_insn; n++) {
		brw_disasm(stdout, &p->store[n], 60);
	}

	printf("\n\n");
	for (n = 0; n < ARRAY_SIZE(fragment); n++) {
		brw_disasm(stdout,
			   (const struct brw_instruction *)&fragment[n][0],
			   60);
	}
}
Beispiel #30
0
void emit_sop(struct brw_compile *p,
	      const struct brw_reg *dst,
	      GLuint mask,
	      GLuint cond,
	      const struct brw_reg *arg0,
	      const struct brw_reg *arg1)
{
   GLuint i;

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {	
	 brw_push_insn_state(p);
	 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 brw_MOV(p, dst[i], brw_imm_f(0));
	 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	 brw_MOV(p, dst[i], brw_imm_f(1.0));
	 brw_pop_insn_state(p);
      }
   }
}