Example #1
0
void brw_emit_anyprim_setup( struct brw_sf_compile *c )
{
   struct brw_codegen *p = &c->func;
   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
   struct brw_reg primmask;
   int jmp;
   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));

   c->nr_verts = 3;
   alloc_regs(c);

   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);

   brw_MOV(p, primmask, brw_imm_ud(1));
   brw_SHL(p, primmask, primmask, payload_prim);

   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
					       (1<<_3DPRIM_TRISTRIP) |
					       (1<<_3DPRIM_TRIFAN) |
					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
					       (1<<_3DPRIM_POLYGON) |
					       (1<<_3DPRIM_RECTLIST) |
					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
   brw_emit_tri_setup(c, false);
   brw_land_fwd_jump(p, jmp);

   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
					       (1<<_3DPRIM_LINESTRIP) |
					       (1<<_3DPRIM_LINELOOP) |
					       (1<<_3DPRIM_LINESTRIP_CONT) |
					       (1<<_3DPRIM_LINESTRIP_BF) |
					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
   brw_emit_line_setup(c, false);
   brw_land_fwd_jump(p, jmp);

   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
   brw_emit_point_sprite_setup(c, false);
   brw_land_fwd_jump(p, jmp);

   brw_emit_point_setup( c, false );
}
Example #2
0
void brw_copy8(struct brw_compile *p,
	       struct brw_reg dst,
	       struct brw_reg src,
	       GLuint count)
{
   GLuint i;

   dst = vec8(dst);
   src = vec8(src);

   for (i = 0; i < count; i++)
   {
      GLuint delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
   }
}
static void emit_abs( struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    int i;
    struct brw_compile *p = &c->func;
    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
    for (i = 0; i < 4; i++) {
	if (inst->DstReg.WriteMask & (1<<i)) {
	    struct brw_reg src, dst;
	    dst = get_dst_reg(c, inst, i, 1);
	    src = get_src_reg(c, &inst->SrcReg[0], i, 1);
	    brw_MOV(p, dst, brw_abs(src));
	}
    }
    brw_set_saturate(p, 0);
}
Example #4
0
static void
set_predicate_control_flag_value(struct brw_compile *p,
                                 struct brw_sf_compile *c,
                                 unsigned value)
{
   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);

   if (value != 0xff) {
      if (value != c->flag_value) {
         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
         c->flag_value = value;
      }

      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
   }
}
Example #5
0
static void
set_predicate_control_flag_value(struct brw_compile *p,
                                 struct brw_sf_compile *c,
                                 unsigned value)
{
   p->current->header.predicate_control = BRW_PREDICATE_NONE;

   if (value != 0xff) {
      if (value != c->flag_value) {
         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
         c->flag_value = value;
      }

      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
   }
}
Example #6
0
void brw_copy8(struct brw_codegen *p,
	       struct brw_reg dst,
	       struct brw_reg src,
	       unsigned count)
{
   unsigned i;

   dst = vec8(dst);
   src = vec8(src);

   for (i = 0; i < count; i++)
   {
      unsigned delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
   }
}
Example #7
0
static void brw_wm_write(struct brw_compile *p, int dw, int src)
{
	int n;

	if (dw == 8 && p->gen >= 060) {
		/* XXX pixel execution mask? */
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);

		brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
		brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
		brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
		brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
		goto done;
	}

	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);

	for (n = 0; n < 4; n++) {
		if (p->gen >= 060) {
			brw_MOV(p,
				brw_message_reg(2 + 2*n),
				brw_vec8_grf(src + 2*n, 0));
		} else if (p->gen >= 045 && dw == 16) {
			brw_MOV(p,
				brw_message_reg(2 + n + BRW_MRF_COMPR4),
				brw_vec8_grf(src + 2*n, 0));
		} else {
			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
			brw_MOV(p,
				brw_message_reg(2 + n),
				brw_vec8_grf(src + 2*n, 0));

			if (dw == 16) {
				brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
				brw_MOV(p,
					brw_message_reg(2 + n + 4),
					brw_vec8_grf(src + 2*n+1, 0));
			}
		}
	}

done:
	brw_fb_write(p, dw);
}
Example #8
0
/**
 * Move a GPR to scratch memory. 
 */
static void emit_spill( struct brw_wm_compile *c,
			struct brw_reg reg,
			GLuint slot )
{
   struct brw_compile *p = &c->func;

   /*
     mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
   */
   brw_MOV(p, brw_message_reg(2), reg);

   /*
     mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
     send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
   */
   brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot);
}
Example #9
0
static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
{
	struct brw_compile *p = &c->func;
	brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
	brw_ff_sync(p, 
				c->reg.R0,
				0,
				c->reg.R0,
				1,	
				1,		/* used */
				1,  	/* msg length */
				1,		/* response length */
				0,		/* eot */
				1,		/* write compelete */
				0,		/* urb offset */
				BRW_URB_SWIZZLE_NONE);
}
Example #10
0
void
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
        struct brw_reg dst,
        struct brw_reg index,
        struct brw_reg offset)
{
    assert(brw->gen <= 7);
    assert(index.file == BRW_IMMEDIATE_VALUE &&
           index.type == BRW_REGISTER_TYPE_UD);
    uint32_t surf_index = index.dw1.ud;

    struct brw_reg header = brw_vec8_grf(0, 0);

    gen6_resolve_implied_move(p, &header, inst->base_mrf);

    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
            offset);

    uint32_t msg_type;

    if (brw->gen >= 6)
        msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
    else if (brw->gen == 5 || brw->is_g4x)
        msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
    else
        msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;

    /* Each of the 8 channel enables is considered for whether each
     * dword is written.
     */
    struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_set_dest(p, send, dst);
    brw_set_src0(p, send, header);
    if (brw->gen < 6)
        send->header.destreg__conditionalmod = inst->base_mrf;
    brw_set_dp_read_message(p, send,
                            surf_index,
                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
                            msg_type,
                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
                            2, /* mlen */
                            true, /* header_present */
                            1 /* rlen */);

    brw_mark_surface_used(&prog_data->base, surf_index);
}
void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
   struct brw_instruction *is_rev;

   /* Initial list of indices for incoming vertexes:
    */
   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
   brw_CMP(p, 
	   vec1(brw_null_reg()), 
	   BRW_CONDITIONAL_EQ, 
	   tmp0,
	   brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

   /* XXX: Is there an easier way to do this?  Need to reverse every
    * second tristrip element:  Can ignore sometimes?
    */
   is_rev = brw_IF(p, BRW_EXECUTE_1);
   {   
      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[1]) );
      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[0]) );
      if (c->need_direction)
	 brw_MOV(p, c->reg.dir, brw_imm_f(-1));
   }
   is_rev = brw_ELSE(p, is_rev);
   {
      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[0]) );
      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[1]) );
      if (c->need_direction)
	 brw_MOV(p, c->reg.dir, brw_imm_f(1));
   }
   brw_ENDIF(p, is_rev);

   brw_MOV(p, get_element(c->reg.inlist, 2),  brw_address(c->reg.vertex[2]) );
   brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
   brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
}
static void emit_math1(struct brw_wm_compile *c,
		struct prog_instruction *inst, GLuint func)
{
    struct brw_compile *p = &c->func;
    struct brw_reg src0, dst;

    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
    brw_MOV(p, brw_message_reg(2), src0);
    brw_math(p,
	    dst,
	    func,
	    (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
	    2,
	    brw_null_reg(),
	    BRW_MATH_DATA_VECTOR,
	    BRW_MATH_PRECISION_FULL);
}
Example #13
0
static void fire_fb_write( struct brw_wm_compile *c,
			   GLuint base_reg,
			   GLuint nr,
			   GLuint target,
			   GLuint eot )
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   uint32_t msg_control;

   /* Pass through control information:
    * 
    * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case.
    */
/*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
   if (intel->gen < 6)
   {
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_MOV(p, 
	       brw_message_reg(base_reg + 1),
	       brw_vec8_grf(1, 0));
      brw_pop_insn_state(p);
   }

   if (c->dispatch_width == 16)
      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
   else
      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;

   /* Send framebuffer write message: */
/*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
   brw_fb_WRITE(p,
		c->dispatch_width,
		base_reg,
		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
		msg_control,
		target,		
		nr,
		0, 
		eot,
		true);
}
Example #14
0
void brw_clip_init_planes( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;

   if (!c->key.nr_userclip) {
      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
   }
}
/* Post-fragment-program processing.  Send the results to the
 * framebuffer.
 */
static void emit_spill( struct brw_wm_compile *c,
			struct brw_reg reg,
			GLuint slot )
{
   struct brw_compile *p = &c->func;

   /*
     mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
   */
   brw_MOV(p, brw_message_reg(2), reg);

   /*
     mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
     send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
   */
   brw_dp_WRITE_16(p, 
		   retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
		   1, 
		   slot);
}
Example #16
0
void emit_cinterp(struct brw_compile *p,
		  const struct brw_reg *dst,
		  GLuint mask,
		  const struct brw_reg *arg0)
{
   struct brw_reg interp[4];
   GLuint nr = arg0[0].nr;
   GLuint i;

   interp[0] = brw_vec1_grf(nr, 0);
   interp[1] = brw_vec1_grf(nr, 4);
   interp[2] = brw_vec1_grf(nr+1, 0);
   interp[3] = brw_vec1_grf(nr+1, 4);

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
         brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
      }
   }
}
Example #17
0
/**
 * Load a GPR from scratch memory. 
 */
static void emit_unspill( struct brw_wm_compile *c,
			  struct brw_reg reg,
			  GLuint slot )
{
   struct brw_compile *p = &c->func;

   /* Slot 0 is the undef value.
    */
   if (slot == 0) {
      brw_MOV(p, reg, brw_imm_f(0));
      return;
   }

   /*
     mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
     send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
   */

   brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot);
}
void brw_clip_emit_vue(struct brw_clip_compile *c,
		       struct brw_indirect vert,
                       enum brw_urb_write_flags flags,
		       GLuint header)
{
   struct brw_codegen *p = &c->func;
   bool allocate = flags & BRW_URB_WRITE_ALLOCATE;

   brw_clip_ff_sync(c);

   /* Any URB entry that is allocated must subsequently be used or discarded,
    * so it doesn't make sense to mark EOT and ALLOCATE at the same time.
    */
   assert(!(allocate && (flags & BRW_URB_WRITE_EOT)));

   /* Copy the vertex from vertn into m1..mN+1:
    */
   brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);

   /* Overwrite PrimType and PrimStart in the message header, for
    * each vertex in turn:
    */
   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));


   /* Send each vertex as a separate write to the urb.  This
    * is different to the concept in brw_sf_emit.c, where
    * subsequent writes are used to build up a single urb
    * entry.  Each of these writes instantiates a separate
    * urb entry - (I think... what about 'allocate'?)
    */
   brw_urb_WRITE(p,
		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
		 0,
		 c->reg.R0,
                 flags,
		 c->nr_regs + 1, /* msg length */
		 allocate ? 1 : 0, /* response_length */
		 0,		/* urb offset */
		 BRW_URB_SWIZZLE_NONE);
}
Example #19
0
void brw_clip_emit_vue(struct brw_clip_compile *c, 
		       struct brw_indirect vert,
		       bool allocate,
		       bool eot,
		       GLuint header)
{
   struct brw_compile *p = &c->func;

   brw_clip_ff_sync(c);

   assert(!(allocate && eot));

   /* Copy the vertex from vertn into m1..mN+1:
    */
   brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);

   /* Overwrite PrimType and PrimStart in the message header, for
    * each vertex in turn:
    */
   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));


   /* Send each vertex as a seperate write to the urb.  This
    * is different to the concept in brw_sf_emit.c, where
    * subsequent writes are used to build up a single urb
    * entry.  Each of these writes instantiates a seperate
    * urb entry - (I think... what about 'allocate'?)
    */
   brw_urb_WRITE(p, 
		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
		 0,
		 c->reg.R0,
		 allocate,
		 1,		/* used */
		 c->nr_regs + 1, /* msg length */
		 allocate ? 1 : 0, /* response_length */ 
		 eot,		/* eot */
		 1,		/* writes_complete */
		 0,		/* urb offset */
		 BRW_URB_SWIZZLE_NONE);
}
Example #20
0
/**
 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
 * Scratch offset should be a multiple of 64.
 * Used for register spilling.
 */
void brw_dp_WRITE_16( struct brw_compile *p,
		      struct brw_reg src,
		      GLuint scratch_offset )
{
   GLuint msg_reg_nr = 1;
   {
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);

      /* set message header global offset field (reg 0, element 2) */
      brw_MOV(p,
	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
	      brw_imm_d(scratch_offset));

      brw_pop_insn_state(p);
   }

   {
      GLuint msg_length = 3;
      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = 0; /* XXX */
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
  
      brw_set_dest(insn, dest);
      brw_set_src0(insn, src);

      brw_set_dp_write_message(p->brw,
			       insn,
			       255, /* binding table index (255=stateless) */
			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
			       msg_length,
			       0, /* pixel scoreboard */
			       0, /* response_length */
			       0); /* eot */
   }
}
Example #21
0
static void wm_src_sample_argb(struct brw_compile *p)
{
	static const uint32_t fragment[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
#include "exa_wm_write.g6b"
	};
	int n;

	brw_push_insn_state(p);
	brw_set_mask_control(p, BRW_MASK_DISABLE);
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	brw_MOV(p,
		retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD),
		brw_imm_ud(0));
	brw_pop_insn_state(p);

	brw_SAMPLE(p,
		   retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW),
		   1,
		   retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD),
		   1, 0,
		   WRITEMASK_XYZW,
		   GEN5_SAMPLER_MESSAGE_SAMPLE,
		   8,
		   5,
		   true,
		   BRW_SAMPLER_SIMD_MODE_SIMD16);


	for (n = 0; n < p->nr_insn; n++) {
		brw_disasm(stdout, &p->store[n], 60);
	}

	printf("\n\n");
	for (n = 0; n < ARRAY_SIZE(fragment); n++) {
		brw_disasm(stdout,
			   (const struct brw_instruction *)&fragment[n][0],
			   60);
	}
}
Example #22
0
/**
 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
 * Scratch offset should be a multiple of 64.
 * Used for register spilling.
 */
void brw_dp_READ_16( struct brw_compile *p,
		      struct brw_reg dest,
		      GLuint scratch_offset )
{
   GLuint msg_reg_nr = 1;
   {
      brw_push_insn_state(p);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_mask_control(p, BRW_MASK_DISABLE);

      /* set message header global offset field (reg 0, element 2) */
      brw_MOV(p,
	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
	      brw_imm_d(scratch_offset));

      brw_pop_insn_state(p);
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = 0; /* XXX */
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
  
      brw_set_dest(insn, dest);	/* UW? */
      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));

      brw_set_dp_read_message(p->brw,
			      insn,
			      255, /* binding table index (255=stateless) */
			      3,  /* msg_control (3 means 4 Owords) */
			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
			      1, /* target cache (render/scratch) */
			      1, /* msg_length */
			      2, /* response_length */
			      0); /* eot */
   }
}
static void emit_unspill( struct brw_wm_compile *c,
			  struct brw_reg reg,
			  GLuint slot )
{
   struct brw_compile *p = &c->func;

   /* Slot 0 is the undef value.
    */
   if (slot == 0) {
      brw_MOV(p, reg, brw_imm_f(0));
      return;
   }

   /*
     mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
     send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
   */

   brw_dp_READ_16(p,
		  retype(vec16(reg), BRW_REGISTER_TYPE_UW),
		  1, 
		  slot);
}
Example #24
0
/**
 * Emit a vertex using the URB_WRITE message.  Use the contents of
 * c->reg.header for the message header, and the registers starting at \c vert
 * for the vertex data.
 *
 * If \c last is true, then this is the last vertex, so no further URB space
 * should be allocated, and this message should end the thread.
 *
 * If \c last is false, then a new URB entry will be allocated, and its handle
 * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
 * message.
 */
static void brw_gs_emit_vue(struct brw_gs_compile *c, 
			    struct brw_reg vert,
			    bool last)
{
   struct brw_compile *p = &c->func;
   bool allocate = !last;

   /* Copy the vertex from vertn into m1..mN+1:
    */
   brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);

   /* Send each vertex as a seperate write to the urb.  This is
    * different to the concept in brw_sf_emit.c, where subsequent
    * writes are used to build up a single urb entry.  Each of these
    * writes instantiates a seperate urb entry, and a new one must be
    * allocated each time.
    */
   brw_urb_WRITE(p, 
		 allocate ? c->reg.temp
                          : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
		 0,
		 c->reg.header,
		 allocate,
		 1,		/* used */
		 c->nr_regs + 1, /* msg length */
		 allocate ? 1 : 0, /* response length */
		 allocate ? 0 : 1, /* eot */
		 1,		/* writes_complete */
		 0,		/* urb offset */
		 BRW_URB_SWIZZLE_NONE);

   if (allocate) {
      brw_MOV(p, get_element_ud(c->reg.header, 0),
              get_element_ud(c->reg.temp, 0));
   }
}
static void emit_math1( struct brw_compile *p, 
			GLuint function,
			const struct brw_reg *dst,
			GLuint mask,
			const struct brw_reg *arg0 )
{
   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code*/

   //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
   //	  function == BRW_MATH_FUNCTION_SINCOS);
   
   brw_MOV(p, brw_message_reg(2), arg0[0]);

   /* Send two messages to perform all 16 operations:
    */
   brw_math_16(p, 
	       dst[0],
	       function,
	       (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
	       2,
	       brw_null_reg(),
	       BRW_MATH_PRECISION_FULL);
}
Example #26
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_compile *p = &c->func;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_gs_alloc_regs(c, num_verts, true);
   brw_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         brw_MOV(p, destination_indices_uw,
                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                         : 0x00020001)); /* (1, 0, 2) */
         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      }
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));

      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_access_mode(p, BRW_ALIGN_16);
            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_set_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_gs_ff_sync(c, 1);

   /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
    * release the URB that was just allocated, and terminate the thread.
    */
   if (key->rasterizer_discard) {
      brw_gs_terminate(c);
      return;
   }

   brw_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}
Example #27
0
/**
 * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
 *
 * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
 * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
 * need to be able to update on a per-vertex basis.
 */
static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c,
                                        unsigned dw2)
{
   struct brw_compile *p = &c->func;
   brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
}
Example #28
0
/**
 * Set up the initial value of c->reg.header register based on c->reg.R0.
 *
 * The following information is passed to the GS thread in R0, and needs to be
 * included in the first URB_WRITE or FF_SYNC message sent by the GS:
 *
 * - DWORD 0 [31:0] handle info (Gen4 only)
 * - DWORD 5 [7:0] FFTID
 * - DWORD 6 [31:0] Debug info
 * - DWORD 7 [31:0] Debug info
 *
 * This function sets up the above data by copying by copying the contents of
 * R0 to the header register.
 */
static void brw_gs_initialize_header(struct brw_gs_compile *c)
{
   struct brw_compile *p = &c->func;
   brw_MOV(p, c->reg.header, c->reg.R0);
}
Example #29
0
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);
   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      pc_coord_replace = calculate_point_sprite_mask(c, i);
      pc_persp &= ~pc_coord_replace;

      if (pc_persp) {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }

      /* Point sprite coordinate replacement: A texcoord with this
       * enabled gets replaced with the value (x, y, 0, 1) where x and
       * y vary from 0 to 1 across the horizontal and vertical of the
       * point.
       */
      if (pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc_coord_replace);
	 /* Caculate 1.0/PointWidth */
	 gen4_math(&c->func,
		   c->tmp,
		   BRW_MATH_FUNCTION_INV,
		   0,
		   c->dx0,
		   BRW_MATH_DATA_SCALAR,
		   BRW_MATH_PRECISION_FULL);

	 brw_set_default_access_mode(p, BRW_ALIGN_16);

	 /* dA/dx, dA/dy */
	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
	 } else {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
	 }

	 /* attribute constant offset */
	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
	 } else {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
	 }

	 brw_set_default_access_mode(p, BRW_ALIGN_1);
      }

      if (pc & ~pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
	 brw_MOV(p, c->m3C0, a0); /* constant value */
      }


      set_predicate_control_flag_value(p, c, pc);
      /* Copy m0..m3 to URB. */
      brw_urb_WRITE(p,
		    brw_null_reg(),
		    0,
		    brw_vec8_grf(0, 0),
                    last ? BRW_URB_WRITE_EOT_COMPLETE
                    : BRW_URB_WRITE_NO_FLAGS,
		    4, 	/* msg len */
		    0,	/* response len */
		    i*4,	/* urb destination offset */
		    BRW_URB_SWIZZLE_TRANSPOSE);
   }

   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
Example #30
0
void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 2;

   if (allocate)
      alloc_regs(c);

   invert_det(c);
   copy_z_inv_w(c);

   if (c->has_flat_shading)
      do_flatshade_line(c);

   for (i = 0; i < c->nr_setup_regs; i++)
   {
      /* Pair of incoming attributes:
       */
      struct brw_reg a0 = offset(c->vert[0], i);
      struct brw_reg a1 = offset(c->vert[1], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
	 brw_MUL(p, a1, a1, c->inv_w[1]);
      }

      /* Calculate coefficients for position, color:
       */
      if (pc_linear) {
	 set_predicate_control_flag_value(p, c, pc_linear);

	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));

 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
		
	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
      }

      {
	 set_predicate_control_flag_value(p, c, pc);

	 /* start point for interpolation
	  */
	 brw_MOV(p, c->m3C0, a0);

	 /* Copy m0..m3 to URB.
	  */
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0),
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* urb destination offset */
		       BRW_URB_SWIZZLE_TRANSPOSE);
      }
   }

   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}