Esempio n. 1
0
static void brw_clip_test( struct brw_clip_compile *c )
{
    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);

    struct brw_reg v0 = get_tmp(c);
    struct brw_reg v1 = get_tmp(c);
    struct brw_reg v2 = get_tmp(c);

    struct brw_indirect vt0 = brw_indirect(0, 0);
    struct brw_indirect vt1 = brw_indirect(1, 0);
    struct brw_indirect vt2 = brw_indirect(2, 0);

    struct brw_compile *p = &c->func;
    struct brw_instruction *is_outside;
    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */

    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
    brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
    brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
    brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));

    /* test nearz, xmin, ymin plane */
    /* clip.xyz < -clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    is_outside = brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p, is_outside);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* test farz, xmax, ymax plane */
    /* clip.xyz > clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    is_outside = brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p, is_outside);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    release_tmps(c);
}
Esempio n. 2
0
void
gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
                                               struct brw_reg dst,
                                               struct brw_reg *src)
{
   vec4_instruction *ir = (vec4_instruction *) instruction;

   if (dst.width == BRW_WIDTH_4) {
      /* This happens in attribute fixups for "dual instanced" geometry
       * shaders, since they use attributes that are vec4's.  Since the exec
       * width is only 4, it's essential that the caller set
       * force_writemask_all in order to make sure the instruction is executed
       * regardless of which channels are enabled.
       */
      assert(ir->force_writemask_all);

      /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
       * the following register region restrictions (from Graphics BSpec:
       * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
       * > Register Region Restrictions)
       *
       *     1. ExecSize must be greater than or equal to Width.
       *
       *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
       *        to Width * HorzStride."
       */
      for (int i = 0; i < 3; i++) {
         if (src[i].file == BRW_GENERAL_REGISTER_FILE)
            src[i] = stride(src[i], 4, 4, 1);
      }
   }

   switch (ir->opcode) {
   case BRW_OPCODE_MOV:
      MOV(dst, src[0]);
      break;

   case BRW_OPCODE_ADD:
      ADD(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MUL:
      MUL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MACH:
      MACH(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MAD:
      MAD(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_FRC:
      FRC(dst, src[0]);
      break;

   case BRW_OPCODE_RNDD:
      RNDD(dst, src[0]);
      break;

   case BRW_OPCODE_RNDE:
      RNDE(dst, src[0]);
      break;

   case BRW_OPCODE_RNDZ:
      RNDZ(dst, src[0]);
      break;

   case BRW_OPCODE_AND:
      AND(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_OR:
      OR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_XOR:
      XOR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_NOT:
      NOT(dst, src[0]);
      break;

   case BRW_OPCODE_ASR:
      ASR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SHR:
      SHR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SHL:
      SHL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_CMP:
      CMP(dst, ir->conditional_mod, src[0], src[1]);
      break;

   case BRW_OPCODE_SEL:
      SEL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DPH:
      DPH(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP4:
      DP4(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP3:
      DP3(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP2:
      DP2(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_F32TO16:
      /* Emulate the Gen7 zeroing bug. */
      MOV(retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
      MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]);
      break;

   case BRW_OPCODE_F16TO32:
      MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF));
      break;

   case BRW_OPCODE_LRP:
      LRP(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFREV:
      /* BFREV only supports UD type for src and dst. */
      BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
            retype(src[0], BRW_REGISTER_TYPE_UD));
      break;

   case BRW_OPCODE_FBH:
      /* FBH only supports UD type for dst. */
      FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_FBL:
      /* FBL only supports UD type for dst. */
      FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_CBIT:
      /* CBIT only supports UD type for dst. */
      CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_ADDC:
      ADDC(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SUBB:
      SUBB(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_BFE:
      BFE(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFI1:
      BFI1(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_BFI2:
      BFI2(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_IF:
      IF(ir->predicate);
      break;

   case BRW_OPCODE_ELSE:
      ELSE();
      break;

   case BRW_OPCODE_ENDIF:
      ENDIF();
      break;

   case BRW_OPCODE_DO:
      DO();
      break;

   case BRW_OPCODE_BREAK:
      BREAK();
      break;

   case BRW_OPCODE_CONTINUE:
      CONTINUE();
      break;

   case BRW_OPCODE_WHILE:
      WHILE();
      break;

   case SHADER_OPCODE_RCP:
      MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
      break;

   case SHADER_OPCODE_RSQ:
      MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
      break;

   case SHADER_OPCODE_SQRT:
      MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
      break;

   case SHADER_OPCODE_EXP2:
      MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
      break;

   case SHADER_OPCODE_LOG2:
      MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
      break;

   case SHADER_OPCODE_SIN:
      MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
      break;

   case SHADER_OPCODE_COS:
      MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
      break;

   case SHADER_OPCODE_POW:
      MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_INT_QUOTIENT:
      MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_INT_REMAINDER:
      MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_TEX:
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
   case SHADER_OPCODE_TXF_CMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
   case SHADER_OPCODE_TG4:
   case SHADER_OPCODE_TG4_OFFSET:
      generate_tex(ir, dst);
      break;

   case VS_OPCODE_URB_WRITE:
      generate_urb_write(ir, true);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_READ:
      generate_scratch_read(ir, dst, src[0]);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
      generate_scratch_write(ir, dst, src[0], src[1]);
      break;

   case VS_OPCODE_PULL_CONSTANT_LOAD:
   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
      generate_pull_constant_load(ir, dst, src[0], src[1]);
      break;

   case GS_OPCODE_URB_WRITE:
      generate_urb_write(ir, false);
      break;

   case GS_OPCODE_THREAD_END:
      generate_gs_thread_end(ir);
      break;

   case GS_OPCODE_SET_WRITE_OFFSET:
      generate_gs_set_write_offset(dst, src[0], src[1]);
      break;

   case GS_OPCODE_SET_VERTEX_COUNT:
      generate_gs_set_vertex_count(dst, src[0]);
      break;

   case GS_OPCODE_SET_DWORD_2_IMMED:
      generate_gs_set_dword_2_immed(dst, src[0]);
      break;

   case GS_OPCODE_PREPARE_CHANNEL_MASKS:
      generate_gs_prepare_channel_masks(dst);
      break;

   case GS_OPCODE_SET_CHANNEL_MASKS:
      generate_gs_set_channel_masks(dst, src[0]);
      break;

   case SHADER_OPCODE_SHADER_TIME_ADD:
      assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
      break;

   case SHADER_OPCODE_UNTYPED_ATOMIC:
      assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC");
      break;

   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
      assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ");
      break;

   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
      assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
      break;

   default:
      if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
         _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
                       opcode_descs[ir->opcode].name);
      } else {
         _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
      }
      abort();
   }
}
/* Post-fragment-program processing.  Send the results to the
 * framebuffer.
 */
static void emit_fb_write( struct brw_wm_compile *c,
			   struct brw_reg *arg0,
			   struct brw_reg *arg1,
			   struct brw_reg *arg2,
			   GLuint target,
			   GLuint eot)
{
   struct brw_compile *p = &c->func;
   GLuint nr = 2;
   GLuint channel;

   /* Reserve a space for AA - may not be needed:
    */
   if (c->key.aa_dest_stencil_reg)
      nr += 1;

   /* I don't really understand how this achieves the color interleave
    * (ie RGBARGBA) in the result:  [Do the saturation here]
    */
   {
      brw_push_insn_state(p);
      
      for (channel = 0; channel < 4; channel++) {
	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */

	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	 brw_MOV(p,
		 brw_message_reg(nr + channel),
		 arg0[channel]);
       
	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
	 brw_MOV(p,
		 brw_message_reg(nr + channel + 4),
		 sechalf(arg0[channel]));
      }

      /* skip over the regs populated above:
       */
      nr += 8;
   
      brw_pop_insn_state(p);
   }

   if (c->key.source_depth_to_render_target)
   {
      if (c->key.computes_depth) 
	 brw_MOV(p, brw_message_reg(nr), arg2[2]);
      else 
	 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */

      nr += 2;
   }

   if (c->key.dest_depth_reg)
   {
      GLuint comp = c->key.dest_depth_reg / 2;
      GLuint off = c->key.dest_depth_reg % 2;

      if (off != 0) {
         brw_push_insn_state(p);
         brw_set_compression_control(p, BRW_COMPRESSION_NONE);

         brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
         /* 2nd half? */
         brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
         brw_pop_insn_state(p);
      }
      else {
         brw_MOV(p, brw_message_reg(nr), arg1[comp]);
      }
      nr += 2;
   }


   if (!c->key.runtime_check_aads_emit) {
      if (c->key.aa_dest_stencil_reg)
	 emit_aa(c, arg1, 2);

      fire_fb_write(c, 0, nr, target, eot);
   }
   else {
      struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
      struct brw_reg ip = brw_ip_reg();
      struct brw_instruction *jmp;
      
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
      brw_AND(p, 
	      v1_null_ud, 
	      get_element_ud(brw_vec8_grf(1,0), 6), 
	      brw_imm_ud(1<<26)); 

      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
      {
	 emit_aa(c, arg1, 2);
	 fire_fb_write(c, 0, nr, target, eot);
	 /* note - thread killed in subroutine */
      }
      brw_land_fwd_jump(p, jmp);

      /* ELSE: Shuffle up one register to fill in the hole left for AA:
       */
      fire_fb_write(c, 1, nr-1, target, eot);
   }
}
Esempio n. 4
0
/* Use mesa's clipping algorithms, translated to GEN4 assembly.
 */
void brw_clip_tri( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_indirect vtx = brw_indirect(0, 0);
   struct brw_indirect vtxPrev = brw_indirect(1, 0);
   struct brw_indirect vtxOut = brw_indirect(2, 0);
   struct brw_indirect plane_ptr = brw_indirect(3, 0);
   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
   struct brw_instruction *plane_loop;
   struct brw_instruction *plane_active;
   struct brw_instruction *vertex_loop;
   struct brw_instruction *next_test;
   struct brw_instruction *prev_test;
   
   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));

   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );

   plane_loop = brw_DO(p, BRW_EXECUTE_1);
   {
      /* if (planemask & 1)
       */
      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
      
      plane_active = brw_IF(p, BRW_EXECUTE_1);
      {
	 /* vtxOut = freelist_ptr++ 
	  */
	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));

	 if (c->key.nr_userclip)
	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
	 else
	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
	    
	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));

	 vertex_loop = brw_DO(p, BRW_EXECUTE_1);
	 {
	    /* vtx = *input_ptr;
	     */
	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));

	    /* IS_NEGATIVE(prev) */
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
	    prev_test = brw_IF(p, BRW_EXECUTE_1);
	    {
	       /* IS_POSITIVE(next)
		*/
	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
	       next_test = brw_IF(p, BRW_EXECUTE_1);
	       {

		  /* Coming back in.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);

		  /* If (vtxOut == 0) vtxOut = vtxPrev
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);

		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++; 
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       }
	       brw_ENDIF(p, next_test);
	       
	    }
	    prev_test = brw_ELSE(p, prev_test);
	    {
	       /* *outlist_ptr++ = vtxPrev;
		* nr_verts++;
		*/
	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));

	       /* IS_NEGATIVE(next)
		*/
	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
	       next_test = brw_IF(p, BRW_EXECUTE_1);
	       {
		  /* Going out of bounds.  Avoid division by zero as we
		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
		   */
		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
		  brw_math_invert(p, c->reg.t, c->reg.t);
		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);

		  /* If (vtxOut == 0) vtxOut = vtx
		   */
		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);

		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);		  

		  /* *outlist_ptr++ = vtxOut;
		   * nr_verts++; 
		   * vtxOut = 0;
		   */
		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
	       } 	       
	       brw_ENDIF(p, next_test);
	    }
	    brw_ENDIF(p, prev_test);
	    
	    /* vtxPrev = vtx;
	     * inlist_ptr++;
	     */
	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));

	    /* while (--loopcount != 0)
	     */
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
	 } 
	 brw_WHILE(p, vertex_loop);

	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
	  * inlist = outlist
	  * inlist_ptr = &inlist[0]
	  * outlist_ptr = &outlist[0]
	  */
	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
      }
      brw_ENDIF(p, plane_active);
      
      /* plane_ptr++;
       */
      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));

      /* nr_verts >= 3 
       */
      brw_CMP(p,
	      vec1(brw_null_reg()),
	      BRW_CONDITIONAL_GE,
	      c->reg.nr_verts,
	      brw_imm_ud(3));
   
      /* && (planemask>>=1) != 0
       */
      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
   }
   brw_WHILE(p, plane_loop);
}
Esempio n. 5
0
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);
   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      pc_coord_replace = calculate_point_sprite_mask(c, i);
      pc_persp &= ~pc_coord_replace;

      if (pc_persp) {
	 brw_set_predicate_control_flag_value(p, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }

      /* Point sprite coordinate replacement: A texcoord with this
       * enabled gets replaced with the value (x, y, 0, 1) where x and
       * y vary from 0 to 1 across the horizontal and vertical of the
       * point.
       */
      if (pc_coord_replace) {
	 brw_set_predicate_control_flag_value(p, pc_coord_replace);
	 /* Caculate 1.0/PointWidth */
	 brw_math(&c->func,
		  c->tmp,
		  BRW_MATH_FUNCTION_INV,
		  BRW_MATH_SATURATE_NONE,
		  0,
		  c->dx0,
		  BRW_MATH_DATA_SCALAR,
		  BRW_MATH_PRECISION_FULL);

	 brw_set_access_mode(p, BRW_ALIGN_16);

	 /* dA/dx, dA/dy */
	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
	 } else {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
	 }

	 /* attribute constant offset */
	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
	 } else {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
	 }

	 brw_set_access_mode(p, BRW_ALIGN_1);
      }

      if (pc & ~pc_coord_replace) {
	 brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
	 brw_MOV(p, c->m3C0, a0); /* constant value */
      }


      brw_set_predicate_control_flag_value(p, pc);
      /* Copy m0..m3 to URB. */
      brw_urb_WRITE(p,
		    brw_null_reg(),
		    0,
		    brw_vec8_grf(0, 0),
		    0, 	/* allocate */
		    1,	/* used */
		    4, 	/* msg len */
		    0,	/* response len */
		    last, 	/* eot */
		    last, 	/* writes complete */
		    i*4,	/* urb destination offset */
		    BRW_URB_SWIZZLE_TRANSPOSE);
   }
}
Esempio n. 6
0
void brw_emit_anyprim_setup( struct brw_sf_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg ip = brw_ip_reg();
   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); 
   struct brw_reg primmask;
   int jmp;
   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
   
   GLuint saveflag;

   c->nr_verts = 3;
   alloc_regs(c);

   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);

   brw_MOV(p, primmask, brw_imm_ud(1));
   brw_SHL(p, primmask, primmask, payload_prim);

   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
					       (1<<_3DPRIM_TRISTRIP) |
					       (1<<_3DPRIM_TRIFAN) |
					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
					       (1<<_3DPRIM_POLYGON) |
					       (1<<_3DPRIM_RECTLIST) |
					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
   {
      saveflag = p->flag_value;
      brw_push_insn_state(p); 
      brw_emit_tri_setup( c, false );
      brw_pop_insn_state(p);
      p->flag_value = saveflag;
      /* note - thread killed in subroutine, so must
       * restore the flag which is changed when building
       * the subroutine. fix #13240
       */
   }
   brw_land_fwd_jump(p, jmp);

   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
					       (1<<_3DPRIM_LINESTRIP) |
					       (1<<_3DPRIM_LINELOOP) |
					       (1<<_3DPRIM_LINESTRIP_CONT) |
					       (1<<_3DPRIM_LINESTRIP_BF) |
					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
   {
      saveflag = p->flag_value;
      brw_push_insn_state(p); 
      brw_emit_line_setup( c, false );
      brw_pop_insn_state(p);
      p->flag_value = saveflag;
      /* note - thread killed in subroutine */
   }
   brw_land_fwd_jump(p, jmp); 

   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
   {
      saveflag = p->flag_value;
      brw_push_insn_state(p); 
      brw_emit_point_sprite_setup( c, false );
      brw_pop_insn_state(p);
      p->flag_value = saveflag;
   }
   brw_land_fwd_jump(p, jmp); 

   brw_emit_point_setup( c, false );
}
Esempio n. 7
0
void brw_emit_unfilled_clip( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_instruction *do_clip;
   

   c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
			(c->key.fill_ccw != c->key.fill_cw) ||
			c->key.fill_ccw == CLIP_CULL ||
			c->key.fill_cw == CLIP_CULL ||
			c->key.copy_bfc_cw ||
			c->key.copy_bfc_ccw);

   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
   brw_clip_tri_init_vertices(c);

   assert(c->offset[VERT_RESULT_EDGE]);

   if (c->key.fill_ccw == CLIP_CULL &&
       c->key.fill_cw == CLIP_CULL) {
      brw_clip_kill_thread(c);
      return;
   }

   merge_edgeflags(c);

   /* Need to use the inlist indirection here: 
    */
   if (c->need_direction) 
      compute_tri_direction(c);
   
   if (c->key.fill_ccw == CLIP_CULL ||
       c->key.fill_cw == CLIP_CULL)
      cull_direction(c);

   if (c->key.offset_ccw ||
       c->key.offset_cw)
      compute_offset(c);

   if (c->key.copy_bfc_ccw ||
       c->key.copy_bfc_cw)
      copy_bfc(c);

   /* Need to do this whether we clip or not:
    */
   if (c->key.do_flat_shading)
      brw_clip_tri_flat_shade(c);
   
   brw_clip_init_clipmask(c);
   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
   do_clip = brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_init_planes(c);
      brw_clip_tri(c);
      check_nr_verts(c);
   }
   brw_ENDIF(p, do_clip);
   
   emit_unfilled_primitives(c);
   brw_clip_kill_thread(c);
}
Esempio n. 8
0
void
vec4_gs_visitor::gs_emit_vertex(int stream_id)
{
   this->current_annotation = "emit vertex: safety check";

   /* Haswell and later hardware ignores the "Render Stream Select" bits
    * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled,
    * and instead sends all primitives down the pipeline for rasterization.
    * If the SOL stage is enabled, "Render Stream Select" is honored and
    * primitives bound to non-zero streams are discarded after stream output.
    *
    * Since the only purpose of primives sent to non-zero streams is to
    * be recorded by transform feedback, we can simply discard all geometry
    * bound to these streams when transform feedback is disabled.
    */
   if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
      return;

   /* If we're outputting 32 control data bits or less, then we can wait
    * until the shader is over to output them all.  Otherwise we need to
    * output them as we go.  Now is the time to do it, since we're about to
    * output the vertex_count'th vertex, so it's guaranteed that the
    * control data bits associated with the (vertex_count - 1)th vertex are
    * correct.
    */
   if (c->control_data_header_size_bits > 32) {
      this->current_annotation = "emit vertex: emit control data bits";
      /* Only emit control data bits if we've finished accumulating a batch
       * of 32 bits.  This is the case when:
       *
       *     (vertex_count * bits_per_vertex) % 32 == 0
       *
       * (in other words, when the last 5 bits of vertex_count *
       * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
       * integer n (which is always the case, since bits_per_vertex is
       * always 1 or 2), this is equivalent to requiring that the last 5-n
       * bits of vertex_count are 0:
       *
       *     vertex_count & (2^(5-n) - 1) == 0
       *
       * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
       * equivalent to:
       *
       *     vertex_count & (32 / bits_per_vertex - 1) == 0
       */
      vec4_instruction *inst =
         emit(AND(dst_null_ud(), this->vertex_count,
                  brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
      inst->conditional_mod = BRW_CONDITIONAL_Z;

      emit(IF(BRW_PREDICATE_NORMAL));
      {
         /* If vertex_count is 0, then no control data bits have been
          * accumulated yet, so we skip emitting them.
          */
         emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
                  BRW_CONDITIONAL_NEQ));
         emit(IF(BRW_PREDICATE_NORMAL));
         emit_control_data_bits();
         emit(BRW_OPCODE_ENDIF);

         /* Reset control_data_bits to 0 so we can start accumulating a new
          * batch.
          *
          * Note: in the case where vertex_count == 0, this neutralizes the
          * effect of any call to EndPrimitive() that the shader may have
          * made before outputting its first vertex.
          */
         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
         inst->force_writemask_all = true;
      }
      emit(BRW_OPCODE_ENDIF);
   }

   this->current_annotation = "emit vertex: vertex data";
   emit_vertex();

   /* In stream mode we have to set control data bits for all vertices
    * unless we have disabled control data bits completely (which we do
    * do for GL_POINTS outputs that don't use streams).
    */
   if (c->control_data_header_size_bits > 0 &&
       gs_prog_data->control_data_format ==
          GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
       this->current_annotation = "emit vertex: Stream control data bits";
       set_stream_control_data_bits(stream_id);
   }

   this->current_annotation = NULL;
}
Esempio n. 9
0
/**
 * Write out a batch of 32 control data bits from the control_data_bits
 * register to the URB.
 *
 * The current value of the vertex_count register determines which DWORD in
 * the URB receives the control data bits.  The control_data_bits register is
 * assumed to contain the correct data for the vertex that was most recently
 * output, and all previous vertices that share the same DWORD.
 *
 * This function takes care of ensuring that if no vertices have been output
 * yet, no control bits are emitted.
 */
void
vec4_gs_visitor::emit_control_data_bits()
{
   assert(c->control_data_bits_per_vertex != 0);

   /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized)
    * granularity, we need to use two tricks to ensure that the batch of 32
    * control data bits is written to the appropriate DWORD in the URB.  To
    * select which vec4 we are writing to, we use the "slot {0,1} offset"
    * fields of the message header.  To select which DWORD in the vec4 we are
    * writing to, we use the channel mask fields of the message header.  To
    * avoid penalizing geometry shaders that emit a small number of vertices
    * with extra bookkeeping, we only do each of these tricks when
    * c->prog_data.control_data_header_size_bits is large enough to make it
    * necessary.
    *
    * Note: this means that if we're outputting just a single DWORD of control
    * data bits, we'll actually replicate it four times since we won't do any
    * channel masking.  But that's not a problem since in this case the
    * hardware only pays attention to the first DWORD.
    */
   enum brw_urb_write_flags urb_write_flags = BRW_URB_WRITE_OWORD;
   if (c->control_data_header_size_bits > 32)
      urb_write_flags = urb_write_flags | BRW_URB_WRITE_USE_CHANNEL_MASKS;
   if (c->control_data_header_size_bits > 128)
      urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;

   /* If we are using either channel masks or a per-slot offset, then we
    * need to figure out which DWORD we are trying to write to, using the
    * formula:
    *
    *     dword_index = (vertex_count - 1) * bits_per_vertex / 32
    *
    * Since bits_per_vertex is a power of two, and is known at compile
    * time, this can be optimized to:
    *
    *     dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
    */
   src_reg dword_index(this, glsl_type::uint_type);
   if (urb_write_flags) {
      src_reg prev_count(this, glsl_type::uint_type);
      emit(ADD(dst_reg(prev_count), this->vertex_count,
               brw_imm_ud(0xffffffffu)));
      unsigned log2_bits_per_vertex =
         _mesa_fls(c->control_data_bits_per_vertex);
      emit(SHR(dst_reg(dword_index), prev_count,
               brw_imm_ud(6 - log2_bits_per_vertex)));
   }

   /* Start building the URB write message.  The first MRF gets a copy of
    * R0.
    */
   int base_mrf = 1;
   dst_reg mrf_reg(MRF, base_mrf);
   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
   inst->force_writemask_all = true;

   if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
      /* Set the per-slot offset to dword_index / 4, to that we'll write to
       * the appropriate OWORD within the control data header.
       */
      src_reg per_slot_offset(this, glsl_type::uint_type);
      emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
           brw_imm_ud(1u));
   }

   if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
      /* Set the channel masks to 1 << (dword_index % 4), so that we'll
       * write to the appropriate DWORD within the OWORD.  We need to do
       * this computation with force_writemask_all, otherwise garbage data
       * from invocation 0 might clobber the mask for invocation 1 when
       * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
       * together.
       */
      src_reg channel(this, glsl_type::uint_type);
      inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
      inst->force_writemask_all = true;
      src_reg one(this, glsl_type::uint_type);
      inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
      inst->force_writemask_all = true;
      src_reg channel_mask(this, glsl_type::uint_type);
      inst = emit(SHL(dst_reg(channel_mask), one, channel));
      inst->force_writemask_all = true;
      emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
                                            channel_mask);
      emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
   }

   /* Store the control data bits in the message payload and send it. */
   dst_reg mrf_reg2(MRF, base_mrf + 1);
   inst = emit(MOV(mrf_reg2, this->control_data_bits));
   inst->force_writemask_all = true;
   inst = emit(GS_OPCODE_URB_WRITE);
   inst->urb_write_flags = urb_write_flags;
   /* We need to increment Global Offset by 256-bits to make room for
    * Broadwell's extra "Vertex Count" payload at the beginning of the
    * URB entry.  Since this is an OWord message, Global Offset is counted
    * in 128-bit units, so we must set it to 2.
    */
   if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
      inst->offset = 2;
   inst->base_mrf = base_mrf;
   inst->mlen = 2;
}
void brw_SAMPLE(struct brw_compile *p,
		struct brw_reg dest,
		GLuint msg_reg_nr,
		struct brw_reg src0,
		GLuint binding_table_index,
		GLuint sampler,
		GLuint writemask,
		GLuint msg_type,
		GLuint response_length,
		GLuint msg_length,
		GLboolean eot)
{
   GLboolean need_stall = 0;
   
   if(writemask == 0) {
/*       _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
      return;
   }
   
   /* Hardware doesn't do destination dependency checking on send
    * instructions properly.  Add a workaround which generates the
    * dependency by other means.  In practice it seems like this bug
    * only crops up for texture samples, and only where registers are
    * written by the send and then written again later without being
    * read in between.  Luckily for us, we already track that
    * information and use it to modify the writemask for the
    * instruction, so that is a guide for whether a workaround is
    * needed.
    */
   if (writemask != WRITEMASK_XYZW) {
      GLuint dst_offset = 0;
      GLuint i, newmask = 0, len = 0;

      for (i = 0; i < 4; i++) {
	 if (writemask & (1<<i))
	    break;
	 dst_offset += 2;
      }
      for (; i < 4; i++) {
	 if (!(writemask & (1<<i)))
	    break;
	 newmask |= 1<<i;
	 len++;
      }

      if (newmask != writemask) {
	 need_stall = 1;
/* 	 _mesa_printf("need stall %x %x\n", newmask , writemask); */
      }
      else {
	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
	 
	 newmask = ~newmask & WRITEMASK_XYZW;

	 brw_push_insn_state(p);

	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	 brw_set_mask_control(p, BRW_MASK_DISABLE);

	 brw_MOV(p, m1, brw_vec8_grf(0,0));	 
  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 

	 brw_pop_insn_state(p);

  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
	 dest = offset(dest, dst_offset);
	 response_length = len * 2;
      }
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = 0; /* XXX */
      insn->header.compression_control = BRW_COMPRESSION_NONE;
      insn->header.destreg__conditonalmod = msg_reg_nr;

      brw_set_dest(insn, dest);
      brw_set_src0(insn, src0);
      brw_set_sampler_message(p->brw, insn,
			      binding_table_index,
			      sampler,
			      msg_type,
			      response_length, 
			      msg_length,
			      eot);
   }

   if (need_stall)
   {
      struct brw_reg reg = vec8(offset(dest, response_length-1));

      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
       */
      brw_push_insn_state(p);
      brw_set_compression_control(p, GL_FALSE);
      brw_MOV(p, reg, reg);	      
      brw_pop_insn_state(p);
   }

}
Esempio n. 11
0
void
vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   switch (instr->intrinsic) {
   case nir_intrinsic_load_invocation_id:
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD),
               invocation_id));
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TCS_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;
   case nir_intrinsic_load_patch_vertices_in:
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
               brw_imm_d(key->input_vertices)));
      break;
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
      src_reg vertex_index =
         vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0]))
                      : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);

      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      dst.writemask = brw_writemask_for_size(instr->num_components);

      emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset);
      break;
   }
   case nir_intrinsic_load_input:
      unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
      break;
   case nir_intrinsic_load_output:
   case nir_intrinsic_load_per_vertex_output: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];;

      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      dst.writemask = brw_writemask_for_size(instr->num_components);

      if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
         dst.type = BRW_REGISTER_TYPE_F;

         /* This is a read of gl_TessLevelInner[], which lives in the
          * Patch URB header.  The layout depends on the domain.
          */
         switch (key->tes_primitive_mode) {
         case GL_QUADS: {
            /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
            dst_reg tmp(this, glsl_type::vec4_type);
            emit_output_urb_read(tmp, 0, src_reg());
            emit(MOV(writemask(dst, WRITEMASK_XY),
                     swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
            break;
         }
         case GL_TRIANGLES:
            /* DWord 4; use offset 1 but normal swizzle/writemask. */
            emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg());
            break;
         case GL_ISOLINES:
            /* All channels are undefined. */
            return;
         default:
            unreachable("Bogus tessellation domain");
         }
      } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
         dst.type = BRW_REGISTER_TYPE_F;
         unsigned swiz = BRW_SWIZZLE_WZYX;

         /* This is a read of gl_TessLevelOuter[], which lives in the
          * high 4 DWords of the Patch URB header, in reverse order.
          */
         switch (key->tes_primitive_mode) {
         case GL_QUADS:
            dst.writemask = WRITEMASK_XYZW;
            break;
         case GL_TRIANGLES:
            dst.writemask = WRITEMASK_XYZ;
            break;
         case GL_ISOLINES:
            /* Isolines are not reversed; swizzle .zw -> .xy */
            swiz = BRW_SWIZZLE_ZWZW;
            dst.writemask = WRITEMASK_XY;
            return;
         default:
            unreachable("Bogus tessellation domain");
         }

         dst_reg tmp(this, glsl_type::vec4_type);
         emit_output_urb_read(tmp, 1, src_reg());
         emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
      } else {
         emit_output_urb_read(dst, imm_offset, indirect_offset);
      }
      break;
   }
   case nir_intrinsic_store_output:
   case nir_intrinsic_store_per_vertex_output: {
      src_reg value = get_nir_src(instr->src[0]);
      unsigned mask = instr->const_index[1];
      unsigned swiz = BRW_SWIZZLE_XYZW;

      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];

      if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
         value.type = BRW_REGISTER_TYPE_F;

         mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1;

         /* This is a write to gl_TessLevelInner[], which lives in the
          * Patch URB header.  The layout depends on the domain.
          */
         switch (key->tes_primitive_mode) {
         case GL_QUADS:
            /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
             * We use an XXYX swizzle to reverse put .xy in the .wz
             * channels, and use a .zw writemask.
             */
            swiz = BRW_SWIZZLE4(0, 0, 1, 0);
            mask = writemask_for_backwards_vector(mask);
            break;
         case GL_TRIANGLES:
            /* gl_TessLevelInner[].x lives at DWord 4, so we set the
             * writemask to X and bump the URB offset by 1.
             */
            imm_offset = 1;
            break;
         case GL_ISOLINES:
            /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
            return;
         default:
            unreachable("Bogus tessellation domain");
         }
      } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
         value.type = BRW_REGISTER_TYPE_F;

         mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1;

         /* This is a write to gl_TessLevelOuter[] which lives in the
          * Patch URB Header at DWords 4-7.  However, it's reversed, so
          * instead of .xyzw we have .wzyx.
          */
         if (key->tes_primitive_mode == GL_ISOLINES) {
            /* Isolines .xy should be stored in .zw, in order. */
            swiz = BRW_SWIZZLE4(0, 0, 0, 1);
            mask <<= 2;
         } else {
            /* Other domains are reversed; store .wzyx instead of .xyzw. */
            swiz = BRW_SWIZZLE_WZYX;
            mask = writemask_for_backwards_vector(mask);
         }
      }

      emit_urb_write(swizzle(value, swiz), mask,
                     imm_offset, indirect_offset);
      break;
   }

   case nir_intrinsic_barrier: {
      dst_reg header = dst_reg(this, glsl_type::uvec4_type);
      emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
      emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
      break;
   }

   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}