Esempio n. 1
0
void
gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
                                                       struct brw_reg index)
{
   int second_vertex_offset = 1;

   m1 = retype(m1, BRW_REGISTER_TYPE_D);

   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
    * M1.4 are used, and the rest are ignored.
    */
   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
   struct brw_reg index_0 = suboffset(vec1(index), 0);
   struct brw_reg index_4 = suboffset(vec1(index), 4);

   default_state.mask_control = BRW_MASK_DISABLE;
   default_state.access_mode = BRW_ALIGN_1;

   MOV(m1_0, index_0);

   if (index.file == BRW_IMMEDIATE_VALUE) {
      index_4.dw1.ud += second_vertex_offset;
      MOV(m1_4, index_4);
   } else {
      ADD(m1_4, index_4, brw_imm_d(second_vertex_offset));
   }

   default_state.mask_control = BRW_MASK_ENABLE;
   default_state.access_mode = BRW_ALIGN_16;
}
static void emit_pixel_xy(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_reg r1 = brw_vec1_grf(1, 0);
    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);

    struct brw_reg dst0, dst1;
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    dst0 = get_dst_reg(c, inst, 0, 1);
    dst1 = get_dst_reg(c, inst, 1, 1);
    /* Calculate pixel centers by adding 1 or 0 to each of the
     * micro-tile coordinates passed in r1.
     */
    if (mask & WRITEMASK_X) {
	brw_ADD(p,
		vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 4), 2, 4, 0),
		brw_imm_v(0x10101010));
    }

    if (mask & WRITEMASK_Y) {
	brw_ADD(p,
		vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 5), 2, 4, 0),
		brw_imm_v(0x11001100));
    }

}
static void emit_pixel_xy(struct brw_compile *p,
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0)
{
   struct brw_reg r1 = brw_vec1_grf(1, 0);
   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);

   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   /* Calculate pixel centers by adding 1 or 0 to each of the
    * micro-tile coordinates passed in r1.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
	      stride(suboffset(r1_uw, 4), 2, 4, 0),
	      brw_imm_v(0x10101010));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
	      stride(suboffset(r1_uw,5), 2, 4, 0),
	      brw_imm_v(0x11001100));
   }

   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
static void emit_pinterp( struct brw_compile *p, 
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0,
			  const struct brw_reg *deltas,
			  const struct brw_reg *w)
{
   struct brw_reg interp[4];
   GLuint nr = arg0[0].nr;
   GLuint i;

   interp[0] = brw_vec1_grf(nr, 0);
   interp[1] = brw_vec1_grf(nr, 4);
   interp[2] = brw_vec1_grf(nr+1, 0);
   interp[3] = brw_vec1_grf(nr+1, 4);

   for(i = 0; i < 4; i++ ) {
      if (mask & (1<<i)) {
	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
      }
   }
   for(i = 0; i < 4; i++ ) {
      if (mask & (1<<i)) {
	 brw_MUL(p, dst[i], dst[i], w[3]);
      }
   }
}
static void emit_pixel_w( struct brw_compile *p,
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0,
			  const struct brw_reg *deltas)
{
   /* Don't need this if all you are doing is interpolating color, for
    * instance.
    */
   if (mask & WRITEMASK_W) {      
      struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);

      /* Calc 1/w - just linterp wpos[3] optimized by putting the
       * result straight into a message reg.
       */
      brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);

      /* Calc w */
      brw_math_16( p, dst[3],
		   BRW_MATH_FUNCTION_INV,
		   BRW_MATH_SATURATE_NONE,
		   2, brw_null_reg(),
		   BRW_MATH_PRECISION_FULL);
   }
}
static void emit_ddy(struct brw_wm_compile *c,
                struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    struct brw_reg interp[4];
    struct brw_reg dst;
    struct brw_reg src0, w;
    GLuint nr, i;

    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
    nr = src0.nr;
    w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
    interp[0] = brw_vec1_grf(nr, 0);
    interp[1] = brw_vec1_grf(nr, 4);
    interp[2] = brw_vec1_grf(nr+1, 0);
    interp[3] = brw_vec1_grf(nr+1, 4);
    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
    for(i = 0; i < 4; i++ ) {
        if (mask & (1<<i)) {
            dst = get_dst_reg(c, inst, i, 1);
            brw_MOV(p, dst, suboffset(interp[i], 1));
            brw_MUL(p, dst, dst, w);
        }
    }
    brw_set_saturate(p, 0);
}
static void emit_pinterp(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    struct brw_reg interp[4];
    struct brw_reg dst, delta0, delta1;
    struct brw_reg src0, w;

    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
    delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
    delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
    w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
    GLuint nr = src0.nr;
    int i;

    interp[0] = brw_vec1_grf(nr, 0);
    interp[1] = brw_vec1_grf(nr, 4);
    interp[2] = brw_vec1_grf(nr+1, 0);
    interp[3] = brw_vec1_grf(nr+1, 4);

    for(i = 0; i < 4; i++ ) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    brw_LINE(p, brw_null_reg(), interp[i], delta0);
	    brw_MAC(p, dst, suboffset(interp[i],1), 
		    delta1);
	    brw_MUL(p, dst, dst, w);
	}
    }
}
static void emit_delta_xy(struct brw_compile *p,
			  const struct brw_reg *dst,
			  GLuint mask,
			  const struct brw_reg *arg0,
			  const struct brw_reg *arg1)
{
   struct brw_reg r1 = brw_vec1_grf(1, 0);

   /* Calc delta X,Y by subtracting origin in r1 from the pixel
    * centers.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      dst[0],
	      retype(arg0[0], BRW_REGISTER_TYPE_UW),
	      negate(r1));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      dst[1],
	      retype(arg0[1], BRW_REGISTER_TYPE_UW),
	      negate(suboffset(r1,1)));

   }
}
static void emit_delta_xy(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_reg r1 = brw_vec1_grf(1, 0);
    struct brw_reg dst0, dst1, src0, src1;
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    dst0 = get_dst_reg(c, inst, 0, 1);
    dst1 = get_dst_reg(c, inst, 1, 1);
    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
    src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
    /* Calc delta X,Y by subtracting origin in r1 from the pixel
     * centers.
     */
    if (mask & WRITEMASK_X) {
	brw_ADD(p,
		dst0,
		retype(src0, BRW_REGISTER_TYPE_UW),
		negate(r1));
    }

    if (mask & WRITEMASK_Y) {
	brw_ADD(p,
		dst1,
		retype(src1, BRW_REGISTER_TYPE_UW),
		negate(suboffset(r1,1)));

    }

}
static void emit_pixel_w( struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;
    if (mask & WRITEMASK_W) {
	struct brw_reg dst, src0, delta0, delta1;
	struct brw_reg interp3;

	dst = get_dst_reg(c, inst, 3, 1);
	src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
	delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
	delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);

	interp3 = brw_vec1_grf(src0.nr+1, 4);
	/* Calc 1/w - just linterp wpos[3] optimized by putting the
	 * result straight into a message reg.
	 */
	brw_LINE(p, brw_null_reg(), interp3, delta0);
	brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);

	/* Calc w */
	brw_math_16( p, dst,
		BRW_MATH_FUNCTION_INV,
		BRW_MATH_SATURATE_NONE,
		2, brw_null_reg(),
		BRW_MATH_PRECISION_FULL);
    }
}
Esempio n. 11
0
void
vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
        struct brw_reg src0,
        struct brw_reg src1)
{
    /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
     * Header: M0.3):
     *
     *     Slot 0 Offset. This field, after adding to the Global Offset field
     *     in the message descriptor, specifies the offset (in 256-bit units)
     *     from the start of the URB entry, as referenced by URB Handle 0, at
     *     which the data will be accessed.
     *
     * Similar text describes DWORD M0.4, which is slot 1 offset.
     *
     * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
     * of the register for geometry shader invocations 0 and 1) by the
     * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
     *
     * We can do this with the following EU instruction:
     *
     *     mul(2) dst.3<1>UD src0<8;2,4>UD src1   { Align1 WE_all }
     */
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
            src1);
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Esempio n. 12
0
void
vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
        struct brw_reg src)
{
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);

    /* If we think of the src and dst registers as composed of 8 DWORDs each,
     * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
     * them to WORDs, and then pack them into DWORD 2 of dst.
     *
     * It's easier to get the EU to do this if we think of the src and dst
     * registers as composed of 16 WORDS each; then, we want to pick up the
     * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 of
     * dst.
     *
     * We can do that by the following EU instruction:
     *
     *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
     */
    brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
            stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
static void emit_cinterp(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    struct brw_reg interp[4];
    struct brw_reg dst, src0;

    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
    GLuint nr = src0.nr;
    int i;

    interp[0] = brw_vec1_grf(nr, 0);
    interp[1] = brw_vec1_grf(nr, 4);
    interp[2] = brw_vec1_grf(nr+1, 0);
    interp[3] = brw_vec1_grf(nr+1, 4);

    for(i = 0; i < 4; i++ ) {
	if (mask & (1<<i)) {
	    dst = get_dst_reg(c, inst, i, 1);
	    brw_MOV(p, dst, suboffset(interp[i],3));
	}
    }
}
Esempio n. 14
0
void emit_linterp(struct brw_compile *p,
		  const struct brw_reg *dst,
		  GLuint mask,
		  const struct brw_reg *arg0,
		  const struct brw_reg *deltas)
{
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg interp[4];
   GLuint nr = arg0[0].nr;
   GLuint i;

   interp[0] = brw_vec1_grf(nr, 0);
   interp[1] = brw_vec1_grf(nr, 4);
   interp[2] = brw_vec1_grf(nr+1, 0);
   interp[3] = brw_vec1_grf(nr+1, 4);

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
	 if (intel->gen >= 6) {
	    brw_PLN(p, dst[i], interp[i], brw_vec8_grf(2, 0));
	 } else if (can_do_pln(intel, deltas)) {
	    brw_PLN(p, dst[i], interp[i], deltas[0]);
	 } else {
	    brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
	    brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
	 }
      }
   }
}
Esempio n. 15
0
void
vec4_generator::generate_unpack_flags(vec4_instruction *inst,
                                      struct brw_reg dst)
{
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_access_mode(p, BRW_ALIGN_1);

    struct brw_reg flags = brw_flag_reg(0, 0);
    struct brw_reg dst_0 = suboffset(vec1(dst), 0);
    struct brw_reg dst_4 = suboffset(vec1(dst), 4);

    brw_AND(p, dst_0, flags, brw_imm_ud(0x0f));
    brw_AND(p, dst_4, flags, brw_imm_ud(0xf0));
    brw_SHR(p, dst_4, dst_4, brw_imm_ud(4));

    brw_pop_insn_state(p);
}
Esempio n. 16
0
void emit_pixel_w(struct brw_wm_compile *c,
		  const struct brw_reg *dst,
		  GLuint mask,
		  const struct brw_reg *arg0,
		  const struct brw_reg *deltas)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg src;
   struct brw_reg temp_dst;

   if (intel->gen >= 6)
	temp_dst = dst[3];
   else
	temp_dst = brw_message_reg(2);

   assert(intel->gen < 6);

   /* Don't need this if all you are doing is interpolating color, for
    * instance.
    */
   if (mask & WRITEMASK_W) {      
      struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);

      /* Calc 1/w - just linterp wpos[3] optimized by putting the
       * result straight into a message reg.
       */
      if (can_do_pln(intel, deltas)) {
	 brw_PLN(p, temp_dst, interp3, deltas[0]);
      } else {
	 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
	 brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]);
      }

      /* Calc w */
      if (intel->gen >= 6)
	 src = temp_dst;
      else
	 src = brw_null_reg();

      if (c->dispatch_width == 16) {
	 brw_math_16(p, dst[3],
		     BRW_MATH_FUNCTION_INV,
		     BRW_MATH_SATURATE_NONE,
		     2, src,
		     BRW_MATH_PRECISION_FULL);
      } else {
	 brw_math(p, dst[3],
		  BRW_MATH_FUNCTION_INV,
		  BRW_MATH_SATURATE_NONE,
		  2, src,
		  BRW_MATH_DATA_VECTOR,
		  BRW_MATH_PRECISION_FULL);
      }
   }
}
Esempio n. 17
0
static void copy_z_inv_w( struct brw_sf_compile *c )
{
   struct brw_compile *p = &c->func;
   GLuint i;

   /* Copy both scalars with a single MOV:
    */
   for (i = 0; i < c->nr_verts; i++)
      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
}
Esempio n. 18
0
void
gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
                                                   struct brw_reg src)
{
   assert(src.file == BRW_IMMEDIATE_VALUE);

   default_state.access_mode = BRW_ALIGN_1;

   gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);

   default_state.access_mode = BRW_ALIGN_16;
}
Esempio n. 19
0
void
vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
        struct brw_reg src)
{
    assert(src.file == BRW_IMMEDIATE_VALUE);

    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_MOV(p, suboffset(vec1(dst), 2), src);
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Esempio n. 20
0
/**
 * Computes the screen-space x,y position of the pixels.
 *
 * This will be used by emit_delta_xy() or emit_wpos_xy() for
 * interpolation of attributes..
 *
 * Payload R0:
 *
 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
 *         corresponding to each of the 16 execution channels.
 * R0.1..8 -- ?
 * R1.0 -- triangle vertex 0.X
 * R1.1 -- triangle vertex 0.Y
 * R1.2 -- tile 0 x,y coords (2 packed uwords)
 * R1.3 -- tile 1 x,y coords (2 packed uwords)
 * R1.4 -- tile 2 x,y coords (2 packed uwords)
 * R1.5 -- tile 3 x,y coords (2 packed uwords)
 * R1.6 -- ?
 * R1.7 -- ?
 * R1.8 -- ?
 */
void emit_pixel_xy(struct brw_wm_compile *c,
		   const struct brw_reg *dst,
		   GLuint mask)
{
   struct brw_compile *p = &c->func;
   struct brw_reg r1 = brw_vec1_grf(1, 0);
   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
   struct brw_reg dst0_uw, dst1_uw;

   brw_push_insn_state(p);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   if (c->dispatch_width == 16) {
      dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
   } else {
      dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
   }

   /* Calculate pixel centers by adding 1 or 0 to each of the
    * micro-tile coordinates passed in r1.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      dst0_uw,
	      stride(suboffset(r1_uw, 4), 2, 4, 0),
	      brw_imm_v(0x10101010));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      dst1_uw,
	      stride(suboffset(r1_uw,5), 2, 4, 0),
	      brw_imm_v(0x11001100));
   }
   brw_pop_insn_state(p);
}
Esempio n. 21
0
void
vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
        struct brw_reg index)
{
    int second_vertex_offset;

    if (brw->gen >= 6)
        second_vertex_offset = 1;
    else
        second_vertex_offset = 16;

    m1 = retype(m1, BRW_REGISTER_TYPE_D);

    /* Set up M1 (message payload).  Only the block offsets in M1.0 and
     * M1.4 are used, and the rest are ignored.
     */
    struct brw_reg m1_0 = suboffset(vec1(m1), 0);
    struct brw_reg m1_4 = suboffset(vec1(m1), 4);
    struct brw_reg index_0 = suboffset(vec1(index), 0);
    struct brw_reg index_4 = suboffset(vec1(index), 4);

    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_access_mode(p, BRW_ALIGN_1);

    brw_MOV(p, m1_0, index_0);

    if (index.file == BRW_IMMEDIATE_VALUE) {
        index_4.dw1.ud += second_vertex_offset;
        brw_MOV(p, m1_4, index_4);
    } else {
        brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
    }

    brw_pop_insn_state(p);
}
Esempio n. 22
0
void
gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
{
   /* We want to left shift just DWORD 4 (the x component belonging to the
    * second geometry shader invocation) by 4 bits.  So generate the
    * instruction:
    *
    *     shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
    */
   dst = suboffset(vec1(dst), 4);
   default_state.access_mode = BRW_ALIGN_1;
   gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
   default_state.access_mode = BRW_ALIGN_16;
}
Esempio n. 23
0
void emit_cinterp(struct brw_compile *p,
		  const struct brw_reg *dst,
		  GLuint mask,
		  const struct brw_reg *arg0)
{
   struct brw_reg interp[4];
   GLuint nr = arg0[0].nr;
   GLuint i;

   interp[0] = brw_vec1_grf(nr, 0);
   interp[1] = brw_vec1_grf(nr, 4);
   interp[2] = brw_vec1_grf(nr+1, 0);
   interp[3] = brw_vec1_grf(nr+1, 4);

   for (i = 0; i < 4; i++) {
      if (mask & (1<<i)) {
         brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
      }
   }
}
Esempio n. 24
0
/**
 * Computes the screen-space x,y distance of the pixels from the start
 * vertex.
 *
 * This will be used in linterp or pinterp with the start vertex value
 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
 * to produce interpolated attribute values.
 */
void emit_delta_xy(struct brw_compile *p,
		   const struct brw_reg *dst,
		   GLuint mask,
		   const struct brw_reg *arg0)
{
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg r1 = brw_vec1_grf(1, 0);

   if (mask == 0)
      return;

   assert(mask == WRITEMASK_XY);

   if (intel->gen >= 6) {
       /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1.
	  Just add them with 0.0 for dst reg.. */
       r1 = brw_imm_v(0x00000000);
       brw_ADD(p,
	       dst[0],
	       retype(arg0[0], BRW_REGISTER_TYPE_UW),
	       r1);
       brw_ADD(p,
	       dst[1],
	       retype(arg0[1], BRW_REGISTER_TYPE_UW),
	       r1);
       return;
   }

   /* Calc delta X,Y by subtracting origin in r1 from the pixel
    * centers produced by emit_pixel_xy().
    */
   brw_ADD(p,
	   dst[0],
	   retype(arg0[0], BRW_REGISTER_TYPE_UW),
	   negate(r1));
   brw_ADD(p,
	   dst[1],
	   retype(arg0[1], BRW_REGISTER_TYPE_UW),
	   negate(suboffset(r1,1)));
}
/* Interpolate between two vertices and put the result into a0.0.
 * Increment a0.0 accordingly.
 *
 * Beware that dest_ptr can be equal to v0_ptr!
 */
void brw_clip_interp_vertex( struct brw_clip_compile *c,
			     struct brw_indirect dest_ptr,
			     struct brw_indirect v0_ptr, /* from */
			     struct brw_indirect v1_ptr, /* to */
			     struct brw_reg t0,
			     bool force_edgeflag)
{
   struct brw_codegen *p = &c->func;
   struct brw_reg t_nopersp, v0_ndc_copy;
   GLuint slot;

   /* Just copy the vertex header:
    */
   /*
    * After CLIP stage, only first 256 bits of the VUE are read
    * back on Ironlake, so needn't change it
    */
   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);


   /* First handle the 3D and NDC interpolation, in case we
    * need noperspective interpolation. Doing it early has no
    * performance impact in any case.
    */

   /* Take a copy of the v0 NDC coordinates, in case dest == v0. */
   if (c->has_noperspective_shading) {
      GLuint offset = brw_varying_to_offset(&c->vue_map,
                                                 BRW_VARYING_SLOT_NDC);
      v0_ndc_copy = get_tmp(c);
      brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));
   }

   /* Compute the new 3D position
    *
    * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0
    */
   {
      GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
      struct brw_reg tmp = get_tmp(c);
      brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0);
      brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0);
      brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp);
      release_tmp(c, tmp);
   }

   /* Recreate the projected (NDC) coordinate in the new vertex header */
   brw_clip_project_vertex(c, dest_ptr);

   /* If we have noperspective attributes,
    * we need to compute the screen-space t
    */
   if (c->has_noperspective_shading) {
      GLuint delta = brw_varying_to_offset(&c->vue_map,
                                                BRW_VARYING_SLOT_NDC);
      struct brw_reg tmp = get_tmp(c);
      t_nopersp = get_tmp(c);

      /* t_nopersp = vec4(v1.xy, dest.xy) */
      brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
      brw_MOV(p, tmp, deref_4f(dest_ptr, delta));
      brw_set_default_access_mode(p, BRW_ALIGN_16);
      brw_MOV(p,
              brw_writemask(t_nopersp, WRITEMASK_ZW),
              brw_swizzle(tmp, 0, 1, 0, 1));

      /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */
      brw_ADD(p, t_nopersp, t_nopersp,
              negate(brw_swizzle(v0_ndc_copy, 0, 1, 0, 1)));

      /* Add the absolute values of the X and Y deltas so that if
       * the points aren't in the same place on the screen we get
       * nonzero values to divide.
       *
       * After that, we have vert1 - vert0 in t_nopersp.x and
       * vertnew - vert0 in t_nopersp.y
       *
       * t_nopersp = vec2(|v1.x  -v0.x| + |v1.y  -v0.y|,
       *                  |dest.x-v0.x| + |dest.y-v0.y|)
       */
      brw_ADD(p,
              brw_writemask(t_nopersp, WRITEMASK_XY),
              brw_abs(brw_swizzle(t_nopersp, 0, 2, 0, 0)),
              brw_abs(brw_swizzle(t_nopersp, 1, 3, 0, 0)));
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* If the points are in the same place, just substitute a
       * value to avoid divide-by-zero
       */
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
              vec1(t_nopersp),
              brw_imm_f(0));
      brw_IF(p, BRW_EXECUTE_1);
      brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0),
                                        brw_float_to_vf(0.0),
                                        brw_float_to_vf(0.0),
                                        brw_float_to_vf(0.0)));
      brw_ENDIF(p);

      /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
      brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));
      brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp),
            vec1(suboffset(t_nopersp, 1)));
      brw_set_default_access_mode(p, BRW_ALIGN_16);
      brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, 0, 0, 0, 0));
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      release_tmp(c, tmp);
      release_tmp(c, v0_ndc_copy);
   }

   /* Now we can iterate over each attribute
    * (could be done in pairs?)
    */
   for (slot = 0; slot < c->vue_map.num_slots; slot++) {
      int varying = c->vue_map.slot_to_varying[slot];
      GLuint delta = brw_vue_slot_to_offset(slot);

      /* HPOS, NDC already handled above */
      if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC)
         continue;


      if (varying == VARYING_SLOT_EDGE) {
	 if (force_edgeflag)
	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
	 else
	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
      } else if (varying == VARYING_SLOT_PSIZ) {
         /* PSIZ doesn't need interpolation because it isn't used by the
          * fragment shader.
          */
      } else if (varying < VARYING_SLOT_MAX) {
	 /* This is a true vertex result (and not a special value for the VUE
	  * header), so interpolate:
	  *
	  *        New = attr0 + t*attr1 - t*attr0
          *
          * Unless the attribute is flat shaded -- in which case just copy
          * from one of the sources (doesn't matter which; already copied from pv)
	  */
         GLuint interp = c->key.interpolation_mode.mode[slot];

         if (interp != INTERP_QUALIFIER_FLAT) {
            struct brw_reg tmp = get_tmp(c);
            struct brw_reg t =
               interp == INTERP_QUALIFIER_NOPERSPECTIVE ? t_nopersp : t0;

            brw_MUL(p,
                  vec4(brw_null_reg()),
                  deref_4f(v1_ptr, delta),
                  t);

            brw_MAC(p,
                  tmp,
                  negate(deref_4f(v0_ptr, delta)),
                  t);

            brw_ADD(p,
                  deref_4f(dest_ptr, delta),
                  deref_4f(v0_ptr, delta),
                  tmp);

            release_tmp(c, tmp);
         }
         else {
            brw_MOV(p,
                  deref_4f(dest_ptr, delta),
                  deref_4f(v0_ptr, delta));
         }
      }
   }

   if (c->vue_map.num_slots % 2) {
      GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);

      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
   }

   if (c->has_noperspective_shading)
      release_tmp(c, t_nopersp);
}
Esempio n. 26
0
void
vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
        struct brw_reg src)
{
    /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
     * Header: M0.5):
     *
     *     15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
     *
     *        When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
     *        DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
     *        Vertex 0 DATA[7].  This bit is ANDed with the corresponding
     *        channel enable to determine the final channel enable.  For the
     *        URB_READ_OWORD & URB_READ_HWORD messages, when final channel
     *        enable is 1 it indicates that Vertex 1 DATA [3] will be included
     *        in the writeback message.  For the URB_WRITE_OWORD &
     *        URB_WRITE_HWORD messages, when final channel enable is 1 it
     *        indicates that Vertex 1 DATA [3] will be written to the surface.
     *
     *        0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
     *        1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
     *
     *     14 Vertex 1 DATA [2] Channel Mask
     *     13 Vertex 1 DATA [1] Channel Mask
     *     12 Vertex 1 DATA [0] Channel Mask
     *     11 Vertex 0 DATA [3] Channel Mask
     *     10 Vertex 0 DATA [2] Channel Mask
     *      9 Vertex 0 DATA [1] Channel Mask
     *      8 Vertex 0 DATA [0] Channel Mask
     *
     * (This is from a section of the PRM that is agnostic to the particular
     * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
     * geometry shader invocations 0 and 1, respectively).  Since we have the
     * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
     * and the enable flags for geometry shader invocation 1 in bits 7:0 of
     * DWORD 4, we just need to OR them together and store the result in bits
     * 15:8 of DWORD 5.
     *
     * It's easier to get the EU to do this if we think of the src and dst
     * registers as composed of 32 bytes each; then, we want to pick up the
     * contents of bytes 0 and 16 from src, OR them together, and store them in
     * byte 21.
     *
     * We can do that by the following EU instruction:
     *
     *     or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
     *
     * Note: this relies on the source register having zeros in (a) bits 7:4 of
     * DWORD 0 and (b) bits 3:0 of DWORD 4.  We can rely on (b) because the
     * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
     * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
     * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
     * contain valid channel mask values (which are in the range 0x0-0xf).
     */
    dst = retype(dst, BRW_REGISTER_TYPE_UB);
    src = retype(src, BRW_REGISTER_TYPE_UB);
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
    brw_pop_insn_state(p);
}
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);
   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear;
      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
            
      if (pc_persp)
      {				
	  if (!tex->CoordReplace) {
	      brw_set_predicate_control_flag_value(p, pc_persp);
	      brw_MUL(p, a0, a0, c->inv_w[0]);
	  }
      }

      if (tex->CoordReplace) {
	  /* Caculate 1.0/PointWidth */
	  brw_math(&c->func,
		  c->tmp,
		  BRW_MATH_FUNCTION_INV,
		  BRW_MATH_SATURATE_NONE,
		  0,
		  c->dx0,
		  BRW_MATH_DATA_SCALAR,
		  BRW_MATH_PRECISION_FULL);

	  if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
	  	brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
	  } else {
	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
	  	brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
	  }
      } else {
	  brw_MOV(p, c->m1Cx, brw_imm_ud(0));
	  brw_MOV(p, c->m2Cy, brw_imm_ud(0));
      }

      {
	 brw_set_predicate_control_flag_value(p, pc); 
	 if (tex->CoordReplace) {
	     if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
		 brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
		 brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
	     }
	     else
		 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
	 } else {
	 	brw_MOV(p, c->m3C0, a0); /* constant value */
	 }

	 /* Copy m0..m3 to URB. 
	  */
	 brw_urb_WRITE(p, 
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0),
		       0, 	/* allocate */
		       1,	/* used */
		       4, 	/* msg len */
		       0,	/* response len */
		       last, 	/* eot */
		       last, 	/* writes complete */
		       i*4,	/* urb destination offset */
		       BRW_URB_SWIZZLE_TRANSPOSE);
      }
   }
}
Esempio n. 28
0
/* called at end of declarations section to process chains
   created by EQUIVALENCE statements
 */
 void
doequiv(Void)
{
	register int i;
	int inequiv;			/* True if one namep occurs in
					   several EQUIV declarations */
	int comno;		/* Index into Extsym table of the last
				   COMMON block seen (implicitly assuming
				   that only one will be given) */
	int ovarno;
	ftnint comoffset;	/* Index into the COMMON block */
	ftnint offset;		/* Offset from array base */
	ftnint leng;
	register struct Equivblock *equivdecl;
	register struct Eqvchain *q;
	struct Primblock *primp;
	register Namep np;
	int k, k1, ns, pref, t;
	chainp cp;
	extern int type_pref[];
	char *s;

	for(i = 0 ; i < nequiv ; ++i)
	{

/* Handle each equivalence declaration */

		equivdecl = &eqvclass[i];
		equivdecl->eqvbottom = equivdecl->eqvtop = 0;
		comno = -1;



		for(q = equivdecl->equivs ; q ; q = q->eqvnextp)
		{
			offset = 0;
			if (!(primp = q->eqvitem.eqvlhs))
				continue;
			vardcl(np = primp->namep);
			if(primp->argsp || primp->fcharp)
			{
				expptr offp;

/* Pad ones onto the end of an array declaration when needed */

				if(np->vdim!=NULL && np->vdim->ndim>1 &&
				    nsubs(primp->argsp)==1 )
				{
					if(! ftn66flag)
						warni
			("1-dim subscript in EQUIVALENCE, %d-dim declared",
						    np -> vdim -> ndim);
					cp = NULL;
					ns = np->vdim->ndim;
					while(--ns > 0)
						cp = mkchain((char *)ICON(1), cp);
					primp->argsp->listp->nextp = cp;
				}

				offp = suboffset(primp);
				if(ISICON(offp))
					offset = offp->constblock.Const.ci;
				else	{
					dclerr
			("nonconstant subscript in equivalence ",
					    np);
					np = NULL;
				}
				frexpr(offp);
			}

/* Free up the primblock, since we now have a hash table (Namep) entry */

			frexpr((expptr)primp);

			if(np && (leng = iarrlen(np))<0)
			{
				dclerr("adjustable in equivalence", np);
				np = NULL;
			}

			if(np) switch(np->vstg)
			{
			case STGUNKNOWN:
			case STGBSS:
			case STGEQUIV:
				break;

			case STGCOMMON:

/* The code assumes that all COMMON references in a given EQUIVALENCE will
   be to the same COMMON block, and will all be consistent */

				comno = np->vardesc.varno;
				comoffset = np->voffset + offset;
				break;

			default:
				dclerr("bad storage class in equivalence", np);
				np = NULL;
				break;
			}

			if(np)
			{
				q->eqvoffset = offset;

/* eqvbottom   gets the largest difference between the array base address
   and the address specified in the EQUIV declaration */

				equivdecl->eqvbottom =
				    lmin(equivdecl->eqvbottom, -offset);

/* eqvtop   gets the largest difference between the end of the array and
   the address given in the EQUIVALENCE */

				equivdecl->eqvtop =
				    lmax(equivdecl->eqvtop, leng-offset);
			}
			q->eqvitem.eqvname = np;
		}

/* Now all equivalenced variables are in the hash table with the proper
   offset, and   eqvtop and eqvbottom   are set. */

		if(comno >= 0)

/* Get rid of all STGEQUIVS, they will be mapped onto STGCOMMON variables
   */

			eqvcommon(equivdecl, comno, comoffset);
		else for(q = equivdecl->equivs ; q ; q = q->eqvnextp)
		{
			if(np = q->eqvitem.eqvname)
			{
				inequiv = NO;
				if(np->vstg==STGEQUIV)
					if( (ovarno = np->vardesc.varno) == i)
					{

/* Can't EQUIV different elements of the same array */

						if(np->voffset + q->eqvoffset != 0)
							dclerr
			("inconsistent equivalence", np);
					}
					else	{
						offset = np->voffset;
						inequiv = YES;
					}

				np->vstg = STGEQUIV;
				np->vardesc.varno = i;
				np->voffset = - q->eqvoffset;

				if(inequiv)

/* Combine 2 equivalence declarations */

					eqveqv(i, ovarno, q->eqvoffset + offset);
			}
		}
	}

/* Now each equivalence declaration is distinct (all connections have been
   merged in eqveqv()), and some may be empty. */

	for(i = 0 ; i < nequiv ; ++i)
	{
		equivdecl = & eqvclass[i];
		if(equivdecl->eqvbottom!=0 || equivdecl->eqvtop!=0) {

/* a live chain */

			k = TYCHAR;
			pref = 1;
			for(q = equivdecl->equivs ; q; q = q->eqvnextp)
			    if ((np = q->eqvitem.eqvname)
			    		&& !np->veqvadjust) {
				np->veqvadjust = 1;
				np->voffset -= equivdecl->eqvbottom;
				t = typealign[k1 = np->vtype];
				if (pref < type_pref[k1]) {
					k = k1;
					pref = type_pref[k1];
					}
				if(np->voffset % t != 0) {
					dclerr("bad alignment forced by equivalence", np);
					--nerr; /* don't give bad return code for this */
					}
				}
			equivdecl->eqvtype = k;
		}
		freqchain(equivdecl);
	}
}
Esempio n. 29
0
         offset(foreground_pixel), XrmRString, "Black"},
    {XtNknobWidth, XtCWidth, XrmRInt, sizeof(int),
         offset(knob_width), XrmRString, "9"},
    {XtNknobHeight, XtCHeight, XrmRInt, sizeof(int),
	 offset(knob_height), XrmRString, "9"},
    {XtNknobIndent, XtCKnobIndent, XrmRInt, sizeof(int),
	 offset(knob_indent), XrmRString, "16"},
    {XtNknobPixel, XtCKnobPixel, XrmRPixel, sizeof(int),
	 offset(knob_pixel), XrmRString, "Black"},
    {XtNrefigureMode, XtCRefigureMode, XrmRBoolean, sizeof(int),
         offset(refiguremode), XrmRString, "On"}
};

static XtResource subresources[] = {
    {XtNposition, XtCPosition, XrmRInt, sizeof(int),
         suboffset(position), XrmRString, "0"},
    {XtNmin, XtCMin, XrmRInt, sizeof(int),
         suboffset(min), XrmRString, "0"},
    {XtNmax, XtCMax, XrmRInt, sizeof(int),
         suboffset(max), XrmRString, "1000"},
    {XtNautoChange, XtCAutoChange, XrmRBoolean, sizeof(int),
         suboffset(autochange), XrmRString, "On"}
};

static void Initialize();
static void Realize();
static void Destroy();
static void Resize();
static void SetValues();
static XtGeometryResult GeometryManager();
static void ChangeManaged();