static void emit_math2( struct brw_compile *p, 
			GLuint function,
			const struct brw_reg *dst,
			GLuint mask,
			const struct brw_reg *arg0,
			const struct brw_reg *arg1)
{
   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code*/

   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);

   brw_push_insn_state(p);

   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_MOV(p, brw_message_reg(2), arg0[0]);
   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
   brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));

   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_MOV(p, brw_message_reg(3), arg1[0]);
   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
   brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));

   
   /* Send two messages to perform all 16 operations:
    */
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_math(p, 
	    dst[0],
	    function,
	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
	    2,
	    brw_null_reg(),
	    BRW_MATH_DATA_VECTOR,
	    BRW_MATH_PRECISION_FULL);

   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
   brw_math(p, 
	    offset(dst[0],1),
	    function,
	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
	    4,
	    brw_null_reg(),
	    BRW_MATH_DATA_VECTOR,
	    BRW_MATH_PRECISION_FULL);
   
   brw_pop_insn_state(p);
}
Esempio n. 2
0
void emit_math1(struct brw_wm_compile *c,
		GLuint function,
		const struct brw_reg *dst,
		GLuint mask,
		const struct brw_reg *arg0)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
   GLuint saturate = ((mask & SATURATE) ?
		      BRW_MATH_SATURATE_SATURATE :
		      BRW_MATH_SATURATE_NONE);
   struct brw_reg src;

   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code */

   assert(is_power_of_two(mask & WRITEMASK_XYZW));

   if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
			    arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
			   arg0[0].negate || arg0[0].abs)) {
      /* Gen6 math requires that source and dst horizontal stride be 1,
       * and that the argument be in the GRF.
       *
       * The hardware ignores source modifiers (negate and abs) on math
       * instructions, so we also move to a temp to set those up.
       */
      src = dst[dst_chan];
      brw_MOV(p, src, arg0[0]);
   } else {
      src = arg0[0];
   }

   /* Send two messages to perform all 16 operations:
    */
   brw_push_insn_state(p);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_math(p,
	    dst[dst_chan],
	    function,
	    saturate,
	    2,
	    src,
	    BRW_MATH_DATA_VECTOR,
	    BRW_MATH_PRECISION_FULL);

   if (c->dispatch_width == 16) {
      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
      brw_math(p,
	       offset(dst[dst_chan],1),
	       function,
	       saturate,
	       3,
	       sechalf(src),
	       BRW_MATH_DATA_VECTOR,
	       BRW_MATH_PRECISION_FULL);
   }
   brw_pop_insn_state(p);
}
Esempio n. 3
0
void emit_math2(struct brw_wm_compile *c,
		GLuint function,
		const struct brw_reg *dst,
		GLuint mask,
		const struct brw_reg *arg0,
		const struct brw_reg *arg1)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;

   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code */

   assert(is_power_of_two(mask & WRITEMASK_XYZW));

   brw_push_insn_state(p);

   /* math can only operate on up to a vec8 at a time, so in
    * dispatch_width==16 we have to do the second half manually.
    */
   if (intel->gen >= 6) {
      struct brw_reg src0 = arg0[0];
      struct brw_reg src1 = arg1[0];
      struct brw_reg temp_dst = dst[dst_chan];

      if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
	 brw_MOV(p, temp_dst, src0);
	 src0 = temp_dst;
      }

      if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
	 /* This is a heinous hack to get a temporary register for use
	  * in case both arg0 and arg1 are constants.  Why you're
	  * doing exponentiation on constant values in the shader, we
	  * don't know.
	  *
	  * max_wm_grf is almost surely less than the maximum GRF, and
	  * gen6 doesn't care about the number of GRFs used in a
	  * shader like pre-gen6 did.
	  */
	 struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
	 brw_MOV(p, temp, src1);
	 src1 = temp;
      }

      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_math2(p,
		temp_dst,
		function,
		src0,
		src1);
      if (c->dispatch_width == 16) {
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_math2(p,
		   sechalf(temp_dst),
		   function,
		   sechalf(src0),
		   sechalf(src1));
      }
   } else {
      GLuint saturate = ((mask & SATURATE) ?
			 BRW_MATH_SATURATE_SATURATE :
			 BRW_MATH_SATURATE_NONE);

      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_MOV(p, brw_message_reg(3), arg1[0]);
      if (c->dispatch_width == 16) {
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
      }

      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_math(p,
	       dst[dst_chan],
	       function,
	       saturate,
	       2,
	       arg0[0],
	       BRW_MATH_DATA_VECTOR,
	       BRW_MATH_PRECISION_FULL);

      /* Send two messages to perform all 16 operations:
       */
      if (c->dispatch_width == 16) {
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_math(p,
		  offset(dst[dst_chan],1),
		  function,
		  saturate,
		  4,
		  sechalf(arg0[0]),
		  BRW_MATH_DATA_VECTOR,
		  BRW_MATH_PRECISION_FULL);
      }
   }
   brw_pop_insn_state(p);
}
Esempio n. 4
0
/* Post-fragment-program processing.  Send the results to the
 * framebuffer.
 * \param arg0  the fragment color
 * \param arg1  the pass-through depth value
 * \param arg2  the shader-computed depth value
 */
void emit_fb_write(struct brw_wm_compile *c,
		   struct brw_reg *arg0,
		   struct brw_reg *arg1,
		   struct brw_reg *arg2,
		   GLuint target,
		   GLuint eot)
{
   struct brw_compile *p = &c->func;
   struct brw_context *brw = p->brw;
   struct intel_context *intel = &brw->intel;
   GLuint nr = 2;
   GLuint channel;

   /* Reserve a space for AA - may not be needed:
    */
   if (c->aa_dest_stencil_reg)
      nr += 1;

   /* I don't really understand how this achieves the color interleave
    * (ie RGBARGBA) in the result:  [Do the saturation here]
    */
   brw_push_insn_state(p);

   if (c->key.clamp_fragment_color)
      brw_set_saturate(p, 1);

   for (channel = 0; channel < 4; channel++) {
      if (intel->gen >= 6) {
	 /* gen6 SIMD16 single source DP write looks like:
	  * m + 0: r0
	  * m + 1: r1
	  * m + 2: g0
	  * m + 3: g1
	  * m + 4: b0
	  * m + 5: b1
	  * m + 6: a0
	  * m + 7: a1
	  */
	 if (c->dispatch_width == 16) {
	    brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]);
	 } else {
	    brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]);
	 }
      } else if (c->dispatch_width == 16 && brw->has_compr4) {
	 /* pre-gen6 SIMD16 single source DP write looks like:
	  * m + 0: r0
	  * m + 1: g0
	  * m + 2: b0
	  * m + 3: a0
	  * m + 4: r1
	  * m + 5: g1
	  * m + 6: b1
	  * m + 7: a1
	  *
	  * By setting the high bit of the MRF register number, we indicate
	  * that we want COMPR4 mode - instead of doing the usual destination
	  * + 1 for the second half we get destination + 4.
	  */
	 brw_MOV(p,
		 brw_message_reg(nr + channel + BRW_MRF_COMPR4),
		 arg0[channel]);
      } else {
	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	 brw_MOV(p,
		 brw_message_reg(nr + channel),
		 arg0[channel]);

	 if (c->dispatch_width == 16) {
	    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	    brw_MOV(p,
		    brw_message_reg(nr + channel + 4),
		    sechalf(arg0[channel]));
	 }
      }
   }

   brw_set_saturate(p, 0);

   /* skip over the regs populated above:
    */
   if (c->dispatch_width == 16)
      nr += 8;
   else
      nr += 4;

   brw_pop_insn_state(p);

   if (c->source_depth_to_render_target)
   {
      if (c->computes_depth)
	 brw_MOV(p, brw_message_reg(nr), arg2[2]);
      else 
	 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */

      nr += 2;
   }

   if (c->dest_depth_reg)
   {
      GLuint comp = c->dest_depth_reg / 2;
      GLuint off = c->dest_depth_reg % 2;

      if (off != 0) {
         brw_push_insn_state(p);
         brw_set_compression_control(p, BRW_COMPRESSION_NONE);

         brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
         /* 2nd half? */
         brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
         brw_pop_insn_state(p);
      }
      else {
         brw_MOV(p, brw_message_reg(nr), arg1[comp]);
      }
      nr += 2;
   }

   if (intel->gen >= 6) {
      /* Load the message header.  There's no implied move from src0
       * to the base mrf on gen6.
       */
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD),
	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
      brw_pop_insn_state(p);

      if (target != 0) {
	 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
					0,
					2), BRW_REGISTER_TYPE_UD),
		 brw_imm_ud(target));
      }
   }

   if (!c->runtime_check_aads_emit) {
      if (c->aa_dest_stencil_reg)
	 emit_aa(c, arg1, 2);

      fire_fb_write(c, 0, nr, target, eot);
   }
   else {
      struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
      struct brw_reg ip = brw_ip_reg();
      struct brw_instruction *jmp;
      
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
      brw_AND(p, 
	      v1_null_ud, 
	      get_element_ud(brw_vec8_grf(1,0), 6), 
	      brw_imm_ud(1<<26)); 

      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
      {
	 emit_aa(c, arg1, 2);
	 fire_fb_write(c, 0, nr, target, eot);
	 /* note - thread killed in subroutine */
      }
      brw_land_fwd_jump(p, jmp);

      /* ELSE: Shuffle up one register to fill in the hole left for AA:
       */
      fire_fb_write(c, 1, nr-1, target, eot);
   }
}
/* Post-fragment-program processing.  Send the results to the
 * framebuffer.
 */
static void emit_fb_write( struct brw_wm_compile *c,
			   struct brw_reg *arg0,
			   struct brw_reg *arg1,
			   struct brw_reg *arg2,
			   GLuint target,
			   GLuint eot)
{
   struct brw_compile *p = &c->func;
   GLuint nr = 2;
   GLuint channel;

   /* Reserve a space for AA - may not be needed:
    */
   if (c->key.aa_dest_stencil_reg)
      nr += 1;

   /* I don't really understand how this achieves the color interleave
    * (ie RGBARGBA) in the result:  [Do the saturation here]
    */
   {
      brw_push_insn_state(p);
      
      for (channel = 0; channel < 4; channel++) {
	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */

	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	 brw_MOV(p,
		 brw_message_reg(nr + channel),
		 arg0[channel]);
       
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_MOV(p,
		 brw_message_reg(nr + channel + 4),
		 sechalf(arg0[channel]));
      }

      /* skip over the regs populated above:
       */
      nr += 8;
   
      brw_pop_insn_state(p);
   }

   if (c->key.source_depth_to_render_target)
   {
      if (c->key.computes_depth) 
	 brw_MOV(p, brw_message_reg(nr), arg2[2]);
      else 
	 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */

      nr += 2;
   }

   if (c->key.dest_depth_reg)
   {
      GLuint comp = c->key.dest_depth_reg / 2;
      GLuint off = c->key.dest_depth_reg % 2;

      if (off != 0) {
         brw_push_insn_state(p);
         brw_set_compression_control(p, BRW_COMPRESSION_NONE);

         brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
         /* 2nd half? */
         brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
         brw_pop_insn_state(p);
      }
      else {
         brw_MOV(p, brw_message_reg(nr), arg1[comp]);
      }
      nr += 2;
   }


   if (!c->key.runtime_check_aads_emit) {
      if (c->key.aa_dest_stencil_reg)
	 emit_aa(c, arg1, 2);

      fire_fb_write(c, 0, nr, target, eot);
   }
   else {
      struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
      struct brw_reg ip = brw_ip_reg();
      struct brw_instruction *jmp;
      
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
      brw_AND(p, 
	      v1_null_ud, 
	      get_element_ud(brw_vec8_grf(1,0), 6), 
	      brw_imm_ud(1<<26)); 

      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
      {
	 emit_aa(c, arg1, 2);
	 fire_fb_write(c, 0, nr, target, eot);
	 /* note - thread killed in subroutine */
      }
      brw_land_fwd_jump(p, jmp);

      /* ELSE: Shuffle up one register to fill in the hole left for AA:
       */
      fire_fb_write(c, 1, nr-1, target, eot);
   }
}