Example #1
0
/**
 * Allocate registers for GS.
 *
 * If sol_program is true, then:
 *
 * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF
 *   1 needs to be set aside to hold the streamed vertex buffer indices.
 *
 * - The thread will need to use the destination_indices register.
 */
static void brw_gs_alloc_regs( struct brw_gs_compile *c,
			       GLuint nr_verts,
                               bool sol_program )
{
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   /* Streamed vertex buffer indices */
   if (sol_program)
      c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);

   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
   c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);

   if (sol_program) {
      c->reg.destination_indices =
         retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
   }

   c->prog_data.urb_read_length = c->nr_regs; 
   c->prog_data.total_grf = i;
}
Example #2
0
void brw_NOP(struct brw_compile *p)
{
   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
   brw_set_src1(insn, brw_imm_ud(0x0));
}
Example #3
0
static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
{
   const struct gen_device_info *devinfo = c->func.devinfo;
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   if (c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec4_grf(i, 0);
      i += (6 + c->key.nr_userclip + 1) / 2;

      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
   }
   else
      c->prog_data.curb_read_length = 0;


   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < 4; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   c->reg.t           = brw_vec1_grf(i, 0);
   c->reg.t0          = brw_vec1_grf(i, 1);
   c->reg.t1          = brw_vec1_grf(i, 2);
   c->reg.planemask   = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
   c->reg.plane_equation = brw_vec4_grf(i, 4);
   i++;

   c->reg.dp0         = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
   c->reg.dp1         = brw_vec1_grf(i, 4);
   i++;

   if (!c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec8_grf(i, 0);
      i++;
   }

   c->reg.vertex_src_mask = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
   c->reg.clipdistance_offset = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_W);
   i++;

   if (devinfo->gen == 5) {
      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
      i++;
   }

   c->first_tmp = i;
   c->last_tmp = i;

   c->prog_data.urb_read_length = c->nr_regs; /* ? */
   c->prog_data.total_grf = i;
}
Example #4
0
void brw_ENDIF(struct brw_compile *p, 
	       struct brw_instruction *patch_insn)
{
   GLuint br = 1;

   if (BRW_IS_IGDNG(p->brw))
      br = 2; 
 
   if (p->single_program_flow) {
      /* In single program flow mode, there's no need to execute an ENDIF,
       * since we don't need to do any stack operations, and if we're executing
       * currently, we want to just continue executing.
       */
      struct brw_instruction *next = &p->store[p->nr_insn];

      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);

      patch_insn->bits3.ud = (next - patch_insn) * 16;
   } else {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);

      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
      brw_set_src1(insn, brw_imm_d(0x0));

      insn->header.compression_control = BRW_COMPRESSION_NONE;
      insn->header.execution_size = patch_insn->header.execution_size;
      insn->header.mask_control = BRW_MASK_ENABLE;
      insn->header.thread_control = BRW_THREAD_SWITCH;

      assert(patch_insn->bits3.if_else.jump_count == 0);

      /* Patch the if or else instructions to point at this or the next
       * instruction respectively.
       */
      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
	 /* Automagically turn it into an IFF:
	  */
	 patch_insn->header.opcode = BRW_OPCODE_IFF;
	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
	 patch_insn->bits3.if_else.pop_count = 0;
	 patch_insn->bits3.if_else.pad0 = 0;
      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
	 patch_insn->bits3.if_else.pop_count = 1;
	 patch_insn->bits3.if_else.pad0 = 0;
      } else {
	 assert(0);
      }

      /* Also pop item off the stack in the endif instruction:
       */
      insn->bits3.if_else.jump_count = 0;
      insn->bits3.if_else.pop_count = 1;
      insn->bits3.if_else.pad0 = 0;
   }
}
Example #5
0
/**
 * Determine the register corresponding to the given vue slot
 */
static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
                                   struct brw_reg vert,
                                   int vue_slot)
{
   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
   GLuint sub = vue_slot % 2;

   return brw_vec4_grf(vert.nr + off, sub * 4);
}
Example #6
0
static struct brw_reg get_tmp( struct brw_clip_compile *c )
{
   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);

   if (++c->last_tmp > c->prog_data.total_grf)
      c->prog_data.total_grf = c->last_tmp;

   return tmp;
}
Example #7
0
static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
                                    struct brw_reg vert,
                                    GLuint attr)
{
    GLuint off = c->attr_to_idx[attr] / 2;
    GLuint sub = c->attr_to_idx[attr] % 2;

    return brw_vec4_grf(vert.nr + off, sub * 4);
}
Example #8
0
/**
 * Determine the register corresponding to the given vert_result.
 */
static struct brw_reg get_vert_result(struct brw_sf_compile *c,
                                      struct brw_reg vert,
                                      GLuint vert_result)
{
   int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
   assert (vue_slot >= c->urb_entry_read_offset);
   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
   GLuint sub = vue_slot % 2;

   return brw_vec4_grf(vert.nr + off, sub * 4);
}
Example #9
0
void
brw_blorp_const_color_program::alloc_regs()
{
   int reg = 0;
   this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
   this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);

   prog_data.first_curbe_grf = reg;
   clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
   reg += BRW_BLORP_NUM_PUSH_CONST_REGS;

   /* Make sure we didn't run out of registers */
   assert(reg <= GEN7_MRF_HACK_START);

   this->base_mrf = 2;
}
Example #10
0
static void brw_gs_alloc_regs( struct brw_gs_compile *c,
			       GLuint nr_verts )
{
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   c->prog_data.urb_read_length = c->nr_regs; 
   c->prog_data.total_grf = i;
}
Example #11
0
void
vec4_gs_visitor::emit_prolog()
{
    /* In vertex shaders, r0.2 is guaranteed to be initialized to zero.  In
     * geometry shaders, it isn't (it contains a bunch of information we don't
     * need, like the input primitive type).  We need r0.2 to be zero in order
     * to build scratch read/write messages correctly (otherwise this value
     * will be interpreted as a global offset, causing us to do our scratch
     * reads/writes to garbage memory).  So just set it to zero at the top of
     * the shader.
     */
    this->current_annotation = "clear r0.2";
    dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
    vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
    inst->force_writemask_all = true;

    /* Create a virtual register to hold the vertex count */
    this->vertex_count = src_reg(this, glsl_type::uint_type);

    /* Initialize the vertex_count register to 0 */
    this->current_annotation = "initialize vertex_count";
    inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
    inst->force_writemask_all = true;

    if (c->control_data_header_size_bits > 0) {
        /* Create a virtual register to hold the current set of control data
         * bits.
         */
        this->control_data_bits = src_reg(this, glsl_type::uint_type);

        /* If we're outputting more than 32 control data bits, then EmitVertex()
         * will set control_data_bits to 0 after emitting the first vertex.
         * Otherwise, we need to initialize it to 0 here.
         */
        if (c->control_data_header_size_bits <= 32) {
            this->current_annotation = "initialize control data bits";
            inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
            inst->force_writemask_all = true;
        }
    }

    this->current_annotation = NULL;
}
Example #12
0
void
vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   dst_reg dest;
   src_reg src;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_per_vertex_input: {
      /* The EmitNoIndirectInput flag guarantees our vertex index will
       * be constant.  We should handle indirects someday.
       */
      nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
      nir_const_value *offset = nir_src_as_const_value(instr->src[1]);

      /* Make up a type...we have no way of knowing... */
      const glsl_type *const type = glsl_type::ivec(instr->num_components);

      src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
                          instr->const_index[0] + offset->u32[0],
                    type);
      /* gl_PointSize is passed in the .w component of the VUE header */
      if (instr->const_index[0] == VARYING_SLOT_PSIZ)
         src.swizzle = BRW_SWIZZLE_WWWW;

      dest = get_nir_dest(instr->dest, src.type);
      dest.writemask = brw_writemask_for_size(instr->num_components);
      emit(MOV(dest, src));
      break;
   }

   case nir_intrinsic_load_input:
      unreachable("nir_lower_io should have produced per_vertex intrinsics");

   case nir_intrinsic_emit_vertex_with_counter: {
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      int stream_id = instr->const_index[0];
      gs_emit_vertex(stream_id);
      break;
   }

   case nir_intrinsic_end_primitive_with_counter:
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      gs_end_primitive();
      break;

   case nir_intrinsic_set_vertex_count:
      this->vertex_count =
         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
      break;

   case nir_intrinsic_load_primitive_id:
      assert(gs_prog_data->include_primitive_id);
      dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
      emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
      break;

   case nir_intrinsic_load_invocation_id: {
      src_reg invocation_id =
         src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
      assert(invocation_id.file != BAD_FILE);
      dest = get_nir_dest(instr->dest, invocation_id.type);
      emit(MOV(dest, invocation_id));
      break;
   }

   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
			      GLuint nr_verts )
{
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   if (c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec4_grf(i, 0);
      i += (6 + c->key.nr_userclip + 1) / 2;

      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
   }
   else
      c->prog_data.curb_read_length = 0;


   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   if (c->key.nr_attrs & 1) {
      for (j = 0; j < 3; j++) {
	 GLuint delta = c->key.nr_attrs*16 + 32;

         if (c->chipset.is_igdng)
             delta = c->key.nr_attrs * 16 + 32 * 3;

	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
      }
   }

   c->reg.t          = brw_vec1_grf(i, 0);
   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
   c->reg.plane_equation = brw_vec4_grf(i, 4);
   i++;

   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
   c->reg.dp         = brw_vec1_grf(i, 4);
   i++;

   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   if (!c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
      i++;
   }

   if (c->key.do_unfilled) {
      c->reg.dir     = brw_vec4_grf(i, 0);
      c->reg.offset  = brw_vec4_grf(i, 4);
      i++;
      c->reg.tmp0    = brw_vec4_grf(i, 0);
      c->reg.tmp1    = brw_vec4_grf(i, 4);
      i++;
   }

   if (c->need_ff_sync) {
      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
      i++;
   }

   c->first_tmp = i;
   c->last_tmp = i;

   c->prog_data.urb_read_length = c->nr_regs; /* ? */
   c->prog_data.total_grf = i;
}
Example #14
0
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
			      GLuint nr_verts )
{
   struct intel_context *intel = &c->func.brw->intel;
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   if (c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec4_grf(i, 0);
      i += (6 + c->key.nr_userclip + 1) / 2;

      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
   }
   else
      c->prog_data.curb_read_length = 0;


   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   if (c->vue_map.num_slots % 2) {
      /* The VUE has an odd number of slots so the last register is only half
       * used.  Fill the second half with zero.
       */
      for (j = 0; j < 3; j++) {
	 GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);

	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
      }
   }

   c->reg.t          = brw_vec1_grf(i, 0);
   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
   c->reg.plane_equation = brw_vec4_grf(i, 4);
   i++;

   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
   c->reg.dp         = brw_vec1_grf(i, 4);
   i++;

   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   if (!c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
      i++;
   }

   if (c->key.do_unfilled) {
      c->reg.dir     = brw_vec4_grf(i, 0);
      c->reg.offset  = brw_vec4_grf(i, 4);
      i++;
      c->reg.tmp0    = brw_vec4_grf(i, 0);
      c->reg.tmp1    = brw_vec4_grf(i, 4);
      i++;
   }

   if (intel->needs_ff_sync) {
      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
      i++;
   }

   c->first_tmp = i;
   c->last_tmp = i;

   c->prog_data.urb_read_length = c->nr_regs; /* ? */
   c->prog_data.total_grf = i;
}
Example #15
0
void
vec4_gs_visitor::emit_prolog()
{
   /* In vertex shaders, r0.2 is guaranteed to be initialized to zero.  In
    * geometry shaders, it isn't (it contains a bunch of information we don't
    * need, like the input primitive type).  We need r0.2 to be zero in order
    * to build scratch read/write messages correctly (otherwise this value
    * will be interpreted as a global offset, causing us to do our scratch
    * reads/writes to garbage memory).  So just set it to zero at the top of
    * the shader.
    */
   this->current_annotation = "clear r0.2";
   dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2_IMMED, r0, 0u);
   inst->force_writemask_all = true;

   /* Create a virtual register to hold the vertex count */
   this->vertex_count = src_reg(this, glsl_type::uint_type);

   /* Initialize the vertex_count register to 0 */
   this->current_annotation = "initialize vertex_count";
   inst = emit(MOV(dst_reg(this->vertex_count), 0u));
   inst->force_writemask_all = true;

   if (c->control_data_header_size_bits > 0) {
      /* Create a virtual register to hold the current set of control data
       * bits.
       */
      this->control_data_bits = src_reg(this, glsl_type::uint_type);

      /* If we're outputting more than 32 control data bits, then EmitVertex()
       * will set control_data_bits to 0 after emitting the first vertex.
       * Otherwise, we need to initialize it to 0 here.
       */
      if (c->control_data_header_size_bits <= 32) {
         this->current_annotation = "initialize control data bits";
         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
         inst->force_writemask_all = true;
      }
   }

   /* If the geometry shader uses the gl_PointSize input, we need to fix it up
    * to account for the fact that the vertex shader stored it in the w
    * component of VARYING_SLOT_PSIZ.
    */
   if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) {
      this->current_annotation = "swizzle gl_PointSize input";
      for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) {
         dst_reg dst(ATTR,
                     BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
         dst.type = BRW_REGISTER_TYPE_F;
         src_reg src(dst);
         dst.writemask = WRITEMASK_X;
         src.swizzle = BRW_SWIZZLE_WWWW;
         inst = emit(MOV(dst, src));

         /* In dual instanced dispatch mode, dst has a width of 4, so we need
          * to make sure the MOV happens regardless of which channels are
          * enabled.
          */
         inst->force_writemask_all = true;
      }
   }

   this->current_annotation = NULL;
}