Exemplo n.º 1
0
void brw_emit_anyprim_setup( struct brw_sf_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_context *brw = p->brw;
   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
   struct brw_reg primmask;
   int jmp;
   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));

   c->nr_verts = 3;
   alloc_regs(c);

   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);

   brw_MOV(p, primmask, brw_imm_ud(1));
   brw_SHL(p, primmask, primmask, payload_prim);

   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
					       (1<<_3DPRIM_TRISTRIP) |
					       (1<<_3DPRIM_TRIFAN) |
					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
					       (1<<_3DPRIM_POLYGON) |
					       (1<<_3DPRIM_RECTLIST) |
					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
   brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z);
   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
   brw_emit_tri_setup(c, false);
   brw_land_fwd_jump(p, jmp);

   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
					       (1<<_3DPRIM_LINESTRIP) |
					       (1<<_3DPRIM_LINELOOP) |
					       (1<<_3DPRIM_LINESTRIP_CONT) |
					       (1<<_3DPRIM_LINESTRIP_BF) |
					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
   brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z);
   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
   brw_emit_line_setup(c, false);
   brw_land_fwd_jump(p, jmp);

   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
   brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z);
   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
   brw_emit_point_sprite_setup(c, false);
   brw_land_fwd_jump(p, jmp);

   brw_emit_point_setup( c, false );
}
Exemplo n.º 2
0
void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
{
   struct brw_codegen *p = &c->func;

   /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
    */
   brw_ADD(p,
	   c->reg.loopcount,
	   c->reg.nr_verts,
	   brw_imm_d(-2));
   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_G);

   brw_IF(p, BRW_EXECUTE_1);
   {
      struct brw_indirect v0 = brw_indirect(0, 0);
      struct brw_indirect vptr = brw_indirect(1, 0);

      brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));

      brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE,
                        ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)
                         | URB_WRITE_PRIM_START));

      brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));

      brw_DO(p, BRW_EXECUTE_1);
      {
	 brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE,
                           (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT));

	 brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
	 brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));

	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      }
      brw_WHILE(p);
      brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

      brw_clip_emit_vue(c, v0, BRW_URB_WRITE_EOT_COMPLETE,
                        ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)
                         | URB_WRITE_PRIM_END));
   }
   brw_ENDIF(p);
}
Exemplo n.º 3
0
static void merge_edgeflags( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   const struct brw_context *brw = p->brw;
   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);

   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
   brw_CMP(p,
	   vec1(brw_null_reg()),
	   BRW_CONDITIONAL_EQ,
	   tmp0,
	   brw_imm_ud(_3DPRIM_POLYGON));

   /* Get away with using reg.vertex because we know that this is not
    * a _3DPRIM_TRISTRIP_REVERSE:
    */
   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
      brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_EQ);
      brw_MOV(p, byte_offset(c->reg.vertex[0],
                             brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_EDGE)),
              brw_imm_f(0));
      brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL);

      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
      brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_EQ);
      brw_MOV(p, byte_offset(c->reg.vertex[2],
                             brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_EDGE)),
              brw_imm_f(0));
      brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL);
   }
   brw_ENDIF(p);
}
Exemplo n.º 4
0
void
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
        struct brw_reg dst,
        struct brw_reg index,
        struct brw_reg offset)
{
    assert(brw->gen <= 7);
    assert(index.file == BRW_IMMEDIATE_VALUE &&
           index.type == BRW_REGISTER_TYPE_UD);
    uint32_t surf_index = index.dw1.ud;

    struct brw_reg header = brw_vec8_grf(0, 0);

    gen6_resolve_implied_move(p, &header, inst->base_mrf);

    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
            offset);

    uint32_t msg_type;

    if (brw->gen >= 6)
        msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
    else if (brw->gen == 5 || brw->is_g4x)
        msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
    else
        msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;

    /* Each of the 8 channel enables is considered for whether each
     * dword is written.
     */
    brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_set_dest(p, send, dst);
    brw_set_src0(p, send, header);
    if (brw->gen < 6)
        brw_inst_set_cond_modifier(brw, send, inst->base_mrf);
    brw_set_dp_read_message(p, send,
                            surf_index,
                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
                            msg_type,
                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
                            2, /* mlen */
                            true, /* header_present */
                            1 /* rlen */);

    brw_mark_surface_used(&prog_data->base, surf_index);
}
Exemplo n.º 5
0
static void emit_points(struct brw_clip_compile *c,
			bool do_offset )
{
   struct brw_compile *p = &c->func;
   const struct brw_context *brw = p->brw;

   struct brw_indirect v0 = brw_indirect(0, 0);
   struct brw_indirect v0ptr = brw_indirect(2, 0);

   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));

   brw_DO(p, BRW_EXECUTE_1);
   {
      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));

      /* draw if edgeflag != 0
       */
      brw_CMP(p,
	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
	      deref_1f(v0, brw_varying_to_offset(&c->vue_map,
                                                 VARYING_SLOT_EDGE)),
	      brw_imm_f(0));
      brw_IF(p, BRW_EXECUTE_1);
      {
	 if (do_offset)
	    apply_one_offset(c, v0);

	 brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE,
                           (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT)
                           | URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      }
      brw_ENDIF(p);

      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
      brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ);
   }
   brw_WHILE(p);
   brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL);
}
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    struct brw_codegen *p = &c->func;

    if (p->devinfo->gen == 5) {
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
			c->reg.R0,
			0,
			c->reg.R0,
			1, /* allocate */
			1, /* response length */
			0 /* eot */);
        }
        brw_ENDIF(p);
        brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
Exemplo n.º 7
0
void brw_emit_tri_clip( struct brw_clip_compile *c )
{
   struct brw_codegen *p = &c->func;
   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
   brw_clip_tri_init_vertices(c);
   brw_clip_init_clipmask(c);
   brw_clip_init_ff_sync(c);

   /* if -ve rhw workaround bit is set,
      do cliptest */
   if (p->devinfo->has_negative_rhw_bug) {
      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
              brw_imm_ud(1<<20));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      brw_IF(p, BRW_EXECUTE_1);
      {
         brw_clip_test(c);
      }
      brw_ENDIF(p);
   }
   /* Can't push into do_clip_tri because with polygon (or quad)
    * flatshading, need to apply the flatshade here because we don't
    * respect the PV when converting to trifan for emit:
    */
   if (c->has_flat_shading)
      brw_clip_tri_flat_shade(c);

   if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
       (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
      do_clip_tri(c);
   else
      maybe_do_clip_tri(c);

   brw_clip_tri_emit_polygon(c);

   /* Send an empty message to kill the thread:
    */
   brw_clip_kill_thread(c);
}
Exemplo n.º 8
0
void
vec4_generator::generate_scratch_read(vec4_instruction *inst,
                                      struct brw_reg dst,
                                      struct brw_reg index)
{
   struct brw_reg header = brw_vec8_grf(0, 0);

   gen6_resolve_implied_move(p, &header, inst->base_mrf);

   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
				     index);

   uint32_t msg_type;

   if (brw->gen >= 6)
      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
   else if (brw->gen == 5 || brw->is_g4x)
      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
   else
      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;

   /* Each of the 8 channel enables is considered for whether each
    * dword is written.
    */
   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
   brw_set_dest(p, send, dst);
   brw_set_src0(p, send, header);
   if (brw->gen < 6)
      brw_inst_set_cond_modifier(brw, send, inst->base_mrf);
   brw_set_dp_read_message(p, send,
			   255, /* binding table index: stateless access */
			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
			   msg_type,
			   BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
			   2, /* mlen */
                           true, /* header_present */
			   1 /* rlen */);
}
Exemplo n.º 9
0
/**
 * Loads the clip distance for a vertex into `dst`, and ends with
 * a comparison of it to zero with the condition `cond`.
 *
 * - If using a fixed plane, the distance is dot(hpos, plane).
 * - If using a user clip plane, the distance is directly available in the vertex.
 */
static inline void
load_clip_distance(struct brw_clip_compile *c, struct brw_indirect vtx,
                struct brw_reg dst, GLuint hpos_offset, int cond)
{
   struct brw_codegen *p = &c->func;

   dst = vec4(dst);
   brw_AND(p, vec1(brw_null_reg()), c->reg.vertex_src_mask, brw_imm_ud(1));
   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
   brw_IF(p, BRW_EXECUTE_1);
   {
      struct brw_indirect temp_ptr = brw_indirect(7, 0);
      brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx), c->reg.clipdistance_offset);
      brw_MOV(p, vec1(dst), deref_1f(temp_ptr, 0));
   }
   brw_ELSE(p);
   {
      brw_MOV(p, dst, deref_4f(vtx, hpos_offset));
      brw_DP4(p, dst, dst, c->reg.plane_equation);
   }
   brw_ENDIF(p);

   brw_CMP(p, brw_null_reg(), cond, vec1(dst), brw_imm_f(0.0f));
}
Exemplo n.º 10
0
/***********************************************************************
 * Output clipped polygon as an unfilled primitive:
 */
static void emit_lines(struct brw_clip_compile *c,
		       bool do_offset)
{
   struct brw_compile *p = &c->func;
   const struct brw_context *brw = p->brw;
   struct brw_indirect v0 = brw_indirect(0, 0);
   struct brw_indirect v1 = brw_indirect(1, 0);
   struct brw_indirect v0ptr = brw_indirect(2, 0);
   struct brw_indirect v1ptr = brw_indirect(3, 0);

   /* Need a seperate loop for offset:
    */
   if (do_offset) {
      brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
      brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));

      brw_DO(p, BRW_EXECUTE_1);
      {
	 brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
	 brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
	
	 apply_one_offset(c, v0);
	
	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
         brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_G);
      }
      brw_WHILE(p);
      brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL);
   }

   /* v1ptr = &inlist[nr_verts]
    * *v1ptr = v0
    */
   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
   brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));

   brw_DO(p, BRW_EXECUTE_1);
   {
      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
      brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));

      /* draw edge if edgeflag != 0 */
      brw_CMP(p,
	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
	      deref_1f(v0, brw_varying_to_offset(&c->vue_map,
                                                 VARYING_SLOT_EDGE)),
	      brw_imm_f(0));
      brw_IF(p, BRW_EXECUTE_1);
      {
	 brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE,
                           (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
                           | URB_WRITE_PRIM_START);
	 brw_clip_emit_vue(c, v1, BRW_URB_WRITE_ALLOCATE_COMPLETE,
                           (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
                           | URB_WRITE_PRIM_END);
      }
      brw_ENDIF(p);

      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
      brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ);
   }
   brw_WHILE(p);
   brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL);
}
Exemplo n.º 11
0
/* Line clipping, more or less following the following algorithm:
 *
 *  for (p=0;p<MAX_PLANES;p++) {
 *     if (clipmask & (1 << p)) {
 *        GLfloat dp0 = DOTPROD( vtx0, plane[p] );
 *        GLfloat dp1 = DOTPROD( vtx1, plane[p] );
 *
 *        if (dp1 < 0.0f) {
 *           GLfloat t = dp1 / (dp1 - dp0);
 *           if (t > t1) t1 = t;
 *        } else {
 *           GLfloat t = dp0 / (dp0 - dp1);
 *           if (t > t0) t0 = t;
 *        }
 *
 *        if (t0 + t1 >= 1.0)
 *           return;
 *     }
 *  }
 *
 *  interp( ctx, newvtx0, vtx0, vtx1, t0 );
 *  interp( ctx, newvtx1, vtx1, vtx0, t1 );
 *
 */
static void clip_and_emit_line( struct brw_clip_compile *c )
{
   struct brw_codegen *p = &c->func;
   struct brw_indirect vtx0     = brw_indirect(0, 0);
   struct brw_indirect vtx1      = brw_indirect(1, 0);
   struct brw_indirect newvtx0   = brw_indirect(2, 0);
   struct brw_indirect newvtx1   = brw_indirect(3, 0);
   struct brw_indirect plane_ptr = brw_indirect(4, 0);
   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
   GLint clipdist0_offset = c->key.nr_userclip
      ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0)
      : 0;

   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
   brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
   brw_MOV(p, get_addr_reg(newvtx0),   brw_address(c->reg.vertex[2]));
   brw_MOV(p, get_addr_reg(newvtx1),   brw_address(c->reg.vertex[3]));
   brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));

   /* Note: init t0, t1 together:
    */
   brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));

   brw_clip_init_planes(c);
   brw_clip_init_clipmask(c);

   /* -ve rhw workaround */
   if (p->devinfo->has_negative_rhw_bug) {
      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
              brw_imm_ud(1<<20));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
      brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
   }

   /* Set the initial vertex source mask: The first 6 planes are the bounds
    * of the view volume; the next 8 planes are the user clipping planes.
    */
   brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0));

   /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0].
    * We'll increment 6 times before we start hitting actual user clipping. */
   brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float)));

   brw_DO(p, BRW_EXECUTE_1);
   {
      /* if (planemask & 1)
       */
      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);

      brw_IF(p, BRW_EXECUTE_1);
      {
         brw_AND(p, v1_null_ud, c->reg.vertex_src_mask, brw_imm_ud(1));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_IF(p, BRW_EXECUTE_1);
         {
            /* user clip distance: just fetch the correct float from each vertex */
            struct brw_indirect temp_ptr = brw_indirect(7, 0);
            brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx0), c->reg.clipdistance_offset);
            brw_MOV(p, c->reg.dp0, deref_1f(temp_ptr, 0));
            brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx1), c->reg.clipdistance_offset);
            brw_MOV(p, c->reg.dp1, deref_1f(temp_ptr, 0));
         }
         brw_ELSE(p);
         {
            /* fixed plane: fetch the hpos, dp4 against the plane. */
            if (c->key.nr_userclip)
               brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
            else
               brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));

            brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, hpos_offset), c->reg.plane_equation);
            brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, hpos_offset), c->reg.plane_equation);
         }
         brw_ENDIF(p);

         brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, vec1(c->reg.dp1), brw_imm_f(0.0f));

         brw_IF(p, BRW_EXECUTE_1);
         {
             /*
              * Both can be negative on GM965/G965 due to RHW workaround
              * if so, this object should be rejected.
              */
             if (p->devinfo->has_negative_rhw_bug) {
                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
                 brw_IF(p, BRW_EXECUTE_1);
                 {
                     brw_clip_kill_thread(c);
                 }
                 brw_ENDIF(p);
             }

             brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
             brw_math_invert(p, c->reg.t, c->reg.t);
             brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);

             brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
             brw_MOV(p, c->reg.t1, c->reg.t);
             brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                       BRW_PREDICATE_NORMAL);
	 }
	 brw_ELSE(p);
	 {
             /* Coming back in.  We know that both cannot be negative
              * because the line would have been culled in that case.
              */

             /* If both are positive, do nothing */
             /* Only on GM965/G965 */
             if (p->devinfo->has_negative_rhw_bug) {
                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
                 brw_IF(p, BRW_EXECUTE_1);
             }

             {
                 brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
                 brw_math_invert(p, c->reg.t, c->reg.t);
                 brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);

                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
                 brw_MOV(p, c->reg.t0, c->reg.t);
                 brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                           BRW_PREDICATE_NORMAL);
             }

             if (p->devinfo->has_negative_rhw_bug) {
                 brw_ENDIF(p);
             }
         }
	 brw_ENDIF(p);
      }
      brw_ENDIF(p);

      /* plane_ptr++;
       */
      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));

      /* while (planemask>>=1) != 0
       */
      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1));
      brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
      brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float)));
      brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
   }
   brw_WHILE(p);
   brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, false);
      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, false);

      brw_clip_emit_vue(c, newvtx0, BRW_URB_WRITE_ALLOCATE_COMPLETE,
                        (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
                        | URB_WRITE_PRIM_START);
      brw_clip_emit_vue(c, newvtx1, BRW_URB_WRITE_EOT_COMPLETE,
                        (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
                        | URB_WRITE_PRIM_END);
   }
   brw_ENDIF(p);
   brw_clip_kill_thread(c);
}
Exemplo n.º 12
0
void
vec4_generator::generate_scratch_write(vec4_instruction *inst,
                                       struct brw_reg dst,
                                       struct brw_reg src,
                                       struct brw_reg index)
{
   struct brw_reg header = brw_vec8_grf(0, 0);
   bool write_commit;

   /* If the instruction is predicated, we'll predicate the send, not
    * the header setup.
    */
   brw_set_default_predicate_control(p, false);

   gen6_resolve_implied_move(p, &header, inst->base_mrf);

   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
				     index);

   brw_MOV(p,
	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
	   retype(src, BRW_REGISTER_TYPE_D));

   uint32_t msg_type;

   if (brw->gen >= 7)
      msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
   else if (brw->gen == 6)
      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
   else
      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;

   brw_set_default_predicate_control(p, inst->predicate);

   /* Pre-gen6, we have to specify write commits to ensure ordering
    * between reads and writes within a thread.  Afterwards, that's
    * guaranteed and write commits only matter for inter-thread
    * synchronization.
    */
   if (brw->gen >= 6) {
      write_commit = false;
   } else {
      /* The visitor set up our destination register to be g0.  This
       * means that when the next read comes along, we will end up
       * reading from g0 and causing a block on the write commit.  For
       * write-after-read, we are relying on the value of the previous
       * read being used (and thus blocking on completion) before our
       * write is executed.  This means we have to be careful in
       * instruction scheduling to not violate this assumption.
       */
      write_commit = true;
   }

   /* Each of the 8 channel enables is considered for whether each
    * dword is written.
    */
   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
   brw_set_dest(p, send, dst);
   brw_set_src0(p, send, header);
   if (brw->gen < 6)
      brw_inst_set_cond_modifier(brw, send, inst->base_mrf);
   brw_set_dp_write_message(p, send,
			    255, /* binding table index: stateless access */
			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
			    msg_type,
			    3, /* mlen */
			    true, /* header present */
			    false, /* not a render target write */
			    write_commit, /* rlen */
			    false, /* eot */
			    write_commit);
}
Exemplo n.º 13
0
void
vec4_generator::generate_code(const cfg_t *cfg)
{
   struct annotation_info annotation;
   memset(&annotation, 0, sizeof(annotation));

   foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
      struct brw_reg src[3], dst;

      if (unlikely(debug_flag))
         annotate(brw, &annotation, cfg, inst, p->next_insn_offset);

      for (unsigned int i = 0; i < 3; i++) {
	 src[i] = inst->get_src(this->prog_data, i);
      }
      dst = inst->get_dst();

      brw_set_default_predicate_control(p, inst->predicate);
      brw_set_default_predicate_inverse(p, inst->predicate_inverse);
      brw_set_default_saturate(p, inst->saturate);
      brw_set_default_mask_control(p, inst->force_writemask_all);
      brw_set_default_acc_write_control(p, inst->writes_accumulator);

      unsigned pre_emit_nr_insn = p->nr_insn;

      generate_vec4_instruction(inst, dst, src);

      if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
         assert(p->nr_insn == pre_emit_nr_insn + 1 ||
                !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
                 "emitting more than 1 instruction");

         brw_inst *last = &p->store[pre_emit_nr_insn];

         brw_inst_set_cond_modifier(brw, last, inst->conditional_mod);
         brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear);
         brw_inst_set_no_dd_check(brw, last, inst->no_dd_check);
      }
   }

   brw_set_uip_jip(p);
   annotation_finalize(&annotation, p->next_insn_offset);

   int before_size = p->next_insn_offset;
   brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann);
   int after_size = p->next_insn_offset;

   if (unlikely(debug_flag)) {
      if (shader_prog) {
         fprintf(stderr, "Native code for %s vertex shader %d:\n",
                 shader_prog->Label ? shader_prog->Label : "unnamed",
                 shader_prog->Name);
      } else {
         fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
      }
      fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d"
                      " bytes (%.0f%%)\n",
              before_size / 16, before_size, after_size,
              100.0f * (before_size - after_size) / before_size);

      dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog);
      ralloc_free(annotation.ann);
   }
}
Exemplo n.º 14
0
static void brw_clip_test( struct brw_clip_compile *c )
{
    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);

    struct brw_reg v0 = get_tmp(c);
    struct brw_reg v1 = get_tmp(c);
    struct brw_reg v2 = get_tmp(c);

    struct brw_indirect vt0 = brw_indirect(0, 0);
    struct brw_indirect vt1 = brw_indirect(1, 0);
    struct brw_indirect vt2 = brw_indirect(2, 0);

    struct brw_codegen *p = &c->func;
    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */

    GLuint hpos_offset = brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_POS);

    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
    brw_MOV(p, v0, deref_4f(vt0, hpos_offset));
    brw_MOV(p, v1, deref_4f(vt1, hpos_offset));
    brw_MOV(p, v2, deref_4f(vt2, hpos_offset));
    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));

    /* test nearz, xmin, ymin plane */
    /* clip.xyz < -clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
    brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p);
    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

    /* test farz, xmax, ymax plane */
    /* clip.xyz > clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
    brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p);
    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
    brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

    release_tmps(c);
}
Exemplo n.º 15
0
/* Use mesa's clipping algorithms, translated to GEN4 assembly.
 */
void brw_clip_tri( struct brw_clip_compile *c )
{
   struct brw_codegen *p = &c->func;
   struct brw_indirect vtx = brw_indirect(0, 0);
   struct brw_indirect vtxPrev = brw_indirect(1, 0);
   struct brw_indirect vtxOut = brw_indirect(2, 0);
   struct brw_indirect plane_ptr = brw_indirect(3, 0);
   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
   GLint clipdist0_offset = c->key.nr_userclip
      ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0)
      : 0;

   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));

   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );

   /* Set the initial vertex source mask: The first 6 planes are the bounds
    * of the view volume; the next 8 planes are the user clipping planes.
    */
   brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0));

   /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0].
    * We'll increment 6 times before we start hitting actual user clipping. */
   brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float)));

   brw_DO(p, BRW_EXECUTE_1);
   {
      /* if (planemask & 1)
       */
      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);

      brw_IF(p, BRW_EXECUTE_1);
      {
	 /* vtxOut = freelist_ptr++
	  */
	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));

	 if (c->key.nr_userclip)
	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
	 else
	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));

	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));

	 brw_DO(p, BRW_EXECUTE_1);
	 {
	    /* vtx = *input_ptr;
	     */
	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));

            load_clip_distance(c, vtxPrev, c->reg.dpPrev, hpos_offset, BRW_CONDITIONAL_L);
	    /* (prev < 0.0f) */
	    brw_IF(p, BRW_EXECUTE_1);
	    {
               load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_GE);
	       /* IS_POSITIVE(next)
		*/
	       brw_IF(p, BRW_EXECUTE_1);
	       {

		  /* Coming back in.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);

		  /* If (vtxOut == 0) vtxOut = vtxPrev
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
                  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev));
                  brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                            BRW_PREDICATE_NORMAL);

		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, false);

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++;
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       }
	       brw_ENDIF(p);

	    }
	    brw_ELSE(p);
	    {
	       /* *outlist_ptr++ = vtxPrev;
		* nr_verts++;
		*/
	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));

               load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_L);
	       /* (next < 0.0f)
		*/
	       brw_IF(p, BRW_EXECUTE_1);
	       {
		  /* Going out of bounds.  Avoid division by zero as we
		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);

		  /* If (vtxOut == 0) vtxOut = vtx
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
                  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx));
                  brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                            BRW_PREDICATE_NORMAL);

		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, true);

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++;
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       }
	       brw_ENDIF(p);
	    }
	    brw_ENDIF(p);

	    /* vtxPrev = vtx;
	     * inlist_ptr++;
	     */
	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));

	    /* while (--loopcount != 0)
	     */
	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
            brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
	 }
	 brw_WHILE(p);
         brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
	  * inlist = outlist
	  * inlist_ptr = &inlist[0]
	  * outlist_ptr = &outlist[0]
	  */
	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
      }
      brw_ENDIF(p);

      /* plane_ptr++;
       */
      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));

      /* nr_verts >= 3
       */
      brw_CMP(p,
	      vec1(brw_null_reg()),
	      BRW_CONDITIONAL_GE,
	      c->reg.nr_verts,
	      brw_imm_ud(3));
      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);

      /* && (planemask>>=1) != 0
       */
      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1));
      brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float)));
   }
   brw_WHILE(p);
   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
Exemplo n.º 16
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_codegen *p = &c->func;
   brw_inst *inst;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_ff_gs_alloc_regs(c, num_verts, true);
   brw_ff_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         inst = brw_MOV(p, destination_indices_uw,
                        brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                                : 0x00020001)); /* (1, 0, 2) */
         brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL);
      }

      assert(c->reg.destination_indices.width == BRW_EXECUTE_4);
      brw_push_insn_state(p);
      brw_set_default_exec_size(p, BRW_EXECUTE_4);
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
      brw_pop_insn_state(p);
      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_default_access_mode(p, BRW_ALIGN_16);
            brw_push_insn_state(p);
            brw_set_default_exec_size(p, BRW_EXECUTE_4);

            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_pop_insn_state(p);

            brw_set_default_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_ff_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_ff_gs_ff_sync(c, 1);

   brw_ff_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_ff_gs_offset_header_dw2(c,
                                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_ff_gs_offset_header_dw2(c,
                                  URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}