Example #1
0
/**
 * Define the base addresses which some state is referenced from.
 *
 * This allows us to avoid having to emit relocations in many places for
 * cached state, and instead emit pointers inside of large, mostly-static
 * state pools.  This comes at the expense of memory, and more expensive cache
 * misses.
 */
static int upload_state_base_address( struct brw_context *brw )
{
   /* Output the structure (brw_state_base_address) directly to the
    * batchbuffer, so we can emit relocations inline.
    */
   if (BRW_IS_IGDNG(brw)) {
       BEGIN_BATCH(8, IGNORE_CLIPRECTS);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
       OUT_BATCH(1); /* General state base address */
       OUT_BATCH(1); /* Surface state base address */
       OUT_BATCH(1); /* Indirect object base address */
       OUT_BATCH(1); /* Instruction base address */
       OUT_BATCH(1); /* General state upper bound */
       OUT_BATCH(1); /* Indirect object upper bound */
       OUT_BATCH(1); /* Instruction access upper bound */
       ADVANCE_BATCH();
   } else {
       BEGIN_BATCH(6, IGNORE_CLIPRECTS);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
       OUT_BATCH(1); /* General state base address */
       OUT_BATCH(1); /* Surface state base address */
       OUT_BATCH(1); /* Indirect object base address */
       OUT_BATCH(1); /* General state upper bound */
       OUT_BATCH(1); /* Indirect object upper bound */
       ADVANCE_BATCH();
   }
   return 0;
}
Example #2
0
void brw_ENDIF(struct brw_compile *p, 
	       struct brw_instruction *patch_insn)
{
   GLuint br = 1;

   if (BRW_IS_IGDNG(p->brw))
      br = 2; 
 
   if (p->single_program_flow) {
      /* In single program flow mode, there's no need to execute an ENDIF,
       * since we don't need to do any stack operations, and if we're executing
       * currently, we want to just continue executing.
       */
      struct brw_instruction *next = &p->store[p->nr_insn];

      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);

      patch_insn->bits3.ud = (next - patch_insn) * 16;
   } else {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);

      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
      brw_set_src1(insn, brw_imm_d(0x0));

      insn->header.compression_control = BRW_COMPRESSION_NONE;
      insn->header.execution_size = patch_insn->header.execution_size;
      insn->header.mask_control = BRW_MASK_ENABLE;
      insn->header.thread_control = BRW_THREAD_SWITCH;

      assert(patch_insn->bits3.if_else.jump_count == 0);

      /* Patch the if or else instructions to point at this or the next
       * instruction respectively.
       */
      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
	 /* Automagically turn it into an IFF:
	  */
	 patch_insn->header.opcode = BRW_OPCODE_IFF;
	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
	 patch_insn->bits3.if_else.pop_count = 0;
	 patch_insn->bits3.if_else.pad0 = 0;
      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
	 patch_insn->bits3.if_else.pop_count = 1;
	 patch_insn->bits3.if_else.pad0 = 0;
      } else {
	 assert(0);
      }

      /* Also pop item off the stack in the endif instruction:
       */
      insn->bits3.if_else.jump_count = 0;
      insn->bits3.if_else.pop_count = 1;
      insn->bits3.if_else.pad0 = 0;
   }
}
Example #3
0
static enum pipe_error
gs_unit_create_from_key(struct brw_context *brw,
                        struct brw_gs_unit_key *key,
                        struct brw_winsys_reloc *reloc,
                        unsigned nr_reloc,
                        struct brw_winsys_buffer **bo_out)
{
    struct brw_gs_unit_state gs;
    enum pipe_error ret;


    memset(&gs, 0, sizeof(gs));

    /* reloc */
    gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    gs.thread0.kernel_start_pointer = 0;

    gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    gs.thread1.single_program_flow = 1;

    gs.thread3.dispatch_grf_start_reg = 1;
    gs.thread3.const_urb_entry_read_offset = 0;
    gs.thread3.const_urb_entry_read_length = 0;
    gs.thread3.urb_entry_read_offset = 0;
    gs.thread3.urb_entry_read_length = key->urb_entry_read_length;

    gs.thread4.nr_urb_entries = key->nr_urb_entries;
    gs.thread4.urb_entry_allocation_size = key->urb_size - 1;

    if (key->nr_urb_entries >= 8)
        gs.thread4.max_threads = 1;
    else
        gs.thread4.max_threads = 0;

    if (BRW_IS_IGDNG(brw))
        gs.thread4.rendering_enable = 1;

    if (BRW_DEBUG & DEBUG_STATS)
        gs.thread4.stats_enable = 1;

    ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
    key, sizeof(*key),
    reloc, nr_reloc,
    &gs, sizeof(gs),
    NULL, NULL,
    bo_out);
    if (ret)
        return ret;

    return PIPE_OK;
}
Example #4
0
/* FORWARD JUMPS:
 */
void brw_land_fwd_jump(struct brw_compile *p, 
		       struct brw_instruction *jmp_insn)
{
   struct brw_instruction *landing = &p->store[p->nr_insn];
   GLuint jmpi = 1;

   if (BRW_IS_IGDNG(p->brw))
       jmpi = 2;

   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);

   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
Example #5
0
static dri_bo *
vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
{
   struct brw_vs_unit_state vs;
   dri_bo *bo;
   int chipset_max_threads;

   memset(&vs, 0, sizeof(vs));

   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
   vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
   vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
   /* Choosing multiple program flow means that we may get 2-vertex threads,
    * which will have the channel mask for dwords 4-7 enabled in the thread,
    * and those dwords will be written to the second URB handle when we
    * brw_urb_WRITE() results.
    */
   vs.thread1.single_program_flow = 0;

   if (BRW_IS_IGDNG(brw))
      vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
   else
      vs.thread1.binding_table_entry_count = key->nr_surfaces;

   vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
   vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
   vs.thread3.dispatch_grf_start_reg = 1;
   vs.thread3.urb_entry_read_offset = 0;
   vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;

   if (BRW_IS_IGDNG(brw)) {
      switch (key->nr_urb_entries) {
      case 8:
      case 12:
      case 16:
      case 32:
      case 64:
      case 96:
      case 128:
      case 168:
      case 192:
      case 224:
      case 256:
	 vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
	 break;
      default:
	 assert(0);
      }
   } else {
      switch (key->nr_urb_entries) {
Example #6
0
static dri_bo *
gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
{
   struct brw_gs_unit_state gs;
   dri_bo *bo;

   memset(&gs, 0, sizeof(gs));

   gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
   if (key->prog_active) /* reloc */
      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;

   gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
   gs.thread1.single_program_flow = 1;

   gs.thread3.dispatch_grf_start_reg = 1;
   gs.thread3.const_urb_entry_read_offset = 0;
   gs.thread3.const_urb_entry_read_length = 0;
   gs.thread3.urb_entry_read_offset = 0;
   gs.thread3.urb_entry_read_length = key->urb_entry_read_length;

   gs.thread4.nr_urb_entries = key->nr_urb_entries;
   gs.thread4.urb_entry_allocation_size = key->urb_size - 1;

   if (key->nr_urb_entries >= 8)
      gs.thread4.max_threads = 1;
   else
      gs.thread4.max_threads = 0;

   if (BRW_IS_IGDNG(brw))
      gs.thread4.rendering_enable = 1;

   if (INTEL_DEBUG & DEBUG_STATS)
      gs.thread4.stats_enable = 1;

   bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
			 key, sizeof(*key),
			 &brw->gs.prog_bo, 1,
			 &gs, sizeof(gs),
			 NULL, NULL);

   if (key->prog_active) {
      /* Emit GS program relocation */
      dri_bo_emit_reloc(bo,
			I915_GEM_DOMAIN_INSTRUCTION, 0,
			gs.thread0.grf_reg_count << 1,
			offsetof(struct brw_gs_unit_state, thread0),
			brw->gs.prog_bo);
   }
Example #7
0
static void brw_set_sampler_message(struct brw_context *brw,
                                    struct brw_instruction *insn,
                                    GLuint binding_table_index,
                                    GLuint sampler,
                                    GLuint msg_type,
                                    GLuint response_length,
                                    GLuint msg_length,
                                    GLboolean eot,
                                    GLuint header_present,
                                    GLuint simd_mode)
{
   assert(eot == 0);
   brw_set_src1(insn, brw_imm_d(0));

   if (BRW_IS_IGDNG(brw)) {
      insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
      insn->bits3.sampler_igdng.sampler = sampler;
      insn->bits3.sampler_igdng.msg_type = msg_type;
      insn->bits3.sampler_igdng.simd_mode = simd_mode;
      insn->bits3.sampler_igdng.header_present = header_present;
      insn->bits3.sampler_igdng.response_length = response_length;
      insn->bits3.sampler_igdng.msg_length = msg_length;
      insn->bits3.sampler_igdng.end_of_thread = eot;
      insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
      insn->bits2.send_igdng.end_of_thread = eot;
   } else if (BRW_IS_G4X(brw)) {
      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
      insn->bits3.sampler_g4x.sampler = sampler;
      insn->bits3.sampler_g4x.msg_type = msg_type;
      insn->bits3.sampler_g4x.response_length = response_length;
      insn->bits3.sampler_g4x.msg_length = msg_length;
      insn->bits3.sampler_g4x.end_of_thread = eot;
      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
   } else {
      insn->bits3.sampler.binding_table_index = binding_table_index;
      insn->bits3.sampler.sampler = sampler;
      insn->bits3.sampler.msg_type = msg_type;
      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
      insn->bits3.sampler.response_length = response_length;
      insn->bits3.sampler.msg_length = msg_length;
      insn->bits3.sampler.end_of_thread = eot;
      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
   }
}
Example #8
0
struct brw_instruction *brw_ELSE(struct brw_compile *p, 
				 struct brw_instruction *if_insn)
{
   struct brw_instruction *insn;
   GLuint br = 1;

   if (BRW_IS_IGDNG(p->brw))
      br = 2;

   if (p->single_program_flow) {
      insn = next_insn(p, BRW_OPCODE_ADD);
   } else {
      insn = next_insn(p, BRW_OPCODE_ELSE);
   }

   brw_set_dest(insn, brw_ip_reg());
   brw_set_src0(insn, brw_ip_reg());
   brw_set_src1(insn, brw_imm_d(0x0));

   insn->header.compression_control = BRW_COMPRESSION_NONE;
   insn->header.execution_size = if_insn->header.execution_size;
   insn->header.mask_control = BRW_MASK_ENABLE;
   if (!p->single_program_flow)
       insn->header.thread_control = BRW_THREAD_SWITCH;

   /* Patch the if instruction to point at this instruction.
    */
   if (p->single_program_flow) {
      assert(if_insn->header.opcode == BRW_OPCODE_ADD);

      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
   } else {
      assert(if_insn->header.opcode == BRW_OPCODE_IF);

      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
      if_insn->bits3.if_else.pop_count = 0;
      if_insn->bits3.if_else.pad0 = 0;
   }

   return insn;
}
Example #9
0
static void brw_set_urb_message( struct brw_context *brw,
				 struct brw_instruction *insn,
				 GLboolean allocate,
				 GLboolean used,
				 GLuint msg_length,
				 GLuint response_length,
				 GLboolean end_of_thread,
				 GLboolean complete,
				 GLuint offset,
				 GLuint swizzle_control )
{
    brw_set_src1(insn, brw_imm_d(0));

    if (BRW_IS_IGDNG(brw)) {
        insn->bits3.urb_igdng.opcode = 0;	/* ? */
        insn->bits3.urb_igdng.offset = offset;
        insn->bits3.urb_igdng.swizzle_control = swizzle_control;
        insn->bits3.urb_igdng.allocate = allocate;
        insn->bits3.urb_igdng.used = used;	/* ? */
        insn->bits3.urb_igdng.complete = complete;
        insn->bits3.urb_igdng.header_present = 1;
        insn->bits3.urb_igdng.response_length = response_length;
        insn->bits3.urb_igdng.msg_length = msg_length;
        insn->bits3.urb_igdng.end_of_thread = end_of_thread;
        insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
        insn->bits2.send_igdng.end_of_thread = end_of_thread;
    } else {
        insn->bits3.urb.opcode = 0;	/* ? */
        insn->bits3.urb.offset = offset;
        insn->bits3.urb.swizzle_control = swizzle_control;
        insn->bits3.urb.allocate = allocate;
        insn->bits3.urb.used = used;	/* ? */
        insn->bits3.urb.complete = complete;
        insn->bits3.urb.response_length = response_length;
        insn->bits3.urb.msg_length = msg_length;
        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
        insn->bits3.urb.end_of_thread = end_of_thread;
    }
}
Example #10
0
struct brw_instruction *brw_WHILE(struct brw_compile *p, 
                                  struct brw_instruction *do_insn)
{
   struct brw_instruction *insn;
   GLuint br = 1;

   if (BRW_IS_IGDNG(p->brw))
      br = 2;

   if (p->single_program_flow)
      insn = next_insn(p, BRW_OPCODE_ADD);
   else
      insn = next_insn(p, BRW_OPCODE_WHILE);

   brw_set_dest(insn, brw_ip_reg());
   brw_set_src0(insn, brw_ip_reg());
   brw_set_src1(insn, brw_imm_d(0x0));

   insn->header.compression_control = BRW_COMPRESSION_NONE;

   if (p->single_program_flow) {
      insn->header.execution_size = BRW_EXECUTE_1;

      insn->bits3.d = (do_insn - insn) * 16;
   } else {
      insn->header.execution_size = do_insn->header.execution_size;

      assert(do_insn->header.opcode == BRW_OPCODE_DO);
      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
      insn->bits3.if_else.pop_count = 0;
      insn->bits3.if_else.pad0 = 0;
   }

/*    insn->header.mask_control = BRW_MASK_ENABLE; */

   /* insn->header.mask_control = BRW_MASK_DISABLE; */
   p->current->header.predicate_control = BRW_PREDICATE_NONE;   
   return insn;
}
Example #11
0
static void brw_set_math_message( struct brw_context *brw,
				  struct brw_instruction *insn,
				  GLuint msg_length,
				  GLuint response_length,
				  GLuint function,
				  GLuint integer_type,
				  GLboolean low_precision,
				  GLboolean saturate,
				  GLuint dataType )
{
   brw_set_src1(insn, brw_imm_d(0));

   if (BRW_IS_IGDNG(brw)) {
       insn->bits3.math_igdng.function = function;
       insn->bits3.math_igdng.int_type = integer_type;
       insn->bits3.math_igdng.precision = low_precision;
       insn->bits3.math_igdng.saturate = saturate;
       insn->bits3.math_igdng.data_type = dataType;
       insn->bits3.math_igdng.snapshot = 0;
       insn->bits3.math_igdng.header_present = 0;
       insn->bits3.math_igdng.response_length = response_length;
       insn->bits3.math_igdng.msg_length = msg_length;
       insn->bits3.math_igdng.end_of_thread = 0;
       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
       insn->bits2.send_igdng.end_of_thread = 0;
   } else {
       insn->bits3.math.function = function;
       insn->bits3.math.int_type = integer_type;
       insn->bits3.math.precision = low_precision;
       insn->bits3.math.saturate = saturate;
       insn->bits3.math.data_type = dataType;
       insn->bits3.math.response_length = response_length;
       insn->bits3.math.msg_length = msg_length;
       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
       insn->bits3.math.end_of_thread = 0;
   }
}
Example #12
0
static void brw_set_dp_write_message( struct brw_context *brw,
				      struct brw_instruction *insn,
				      GLuint binding_table_index,
				      GLuint msg_control,
				      GLuint msg_type,
				      GLuint msg_length,
				      GLuint pixel_scoreboard_clear,
				      GLuint response_length,
				      GLuint end_of_thread )
{
   brw_set_src1(insn, brw_imm_d(0));

   if (BRW_IS_IGDNG(brw)) {
       insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
       insn->bits3.dp_write_igdng.msg_control = msg_control;
       insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
       insn->bits3.dp_write_igdng.msg_type = msg_type;
       insn->bits3.dp_write_igdng.send_commit_msg = 0;
       insn->bits3.dp_write_igdng.header_present = 1;
       insn->bits3.dp_write_igdng.response_length = response_length;
       insn->bits3.dp_write_igdng.msg_length = msg_length;
       insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
       insn->bits2.send_igdng.end_of_thread = end_of_thread;
   } else {
       insn->bits3.dp_write.binding_table_index = binding_table_index;
       insn->bits3.dp_write.msg_control = msg_control;
       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
       insn->bits3.dp_write.msg_type = msg_type;
       insn->bits3.dp_write.send_commit_msg = 0;
       insn->bits3.dp_write.response_length = response_length;
       insn->bits3.dp_write.msg_length = msg_length;
       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
       insn->bits3.dp_write.end_of_thread = end_of_thread;
   }
}
Example #13
0
static void brw_set_dp_read_message( struct brw_context *brw,
				      struct brw_instruction *insn,
				      GLuint binding_table_index,
				      GLuint msg_control,
				      GLuint msg_type,
				      GLuint target_cache,
				      GLuint msg_length,
				      GLuint response_length,
				      GLuint end_of_thread )
{
   brw_set_src1(insn, brw_imm_d(0));

   if (BRW_IS_IGDNG(brw)) {
       insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
       insn->bits3.dp_read_igdng.msg_control = msg_control;
       insn->bits3.dp_read_igdng.msg_type = msg_type;
       insn->bits3.dp_read_igdng.target_cache = target_cache;
       insn->bits3.dp_read_igdng.header_present = 1;
       insn->bits3.dp_read_igdng.response_length = response_length;
       insn->bits3.dp_read_igdng.msg_length = msg_length;
       insn->bits3.dp_read_igdng.pad1 = 0;
       insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
       insn->bits2.send_igdng.end_of_thread = end_of_thread;
   } else {
       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
   }
}
Example #14
0
/* Interpolate between two vertices and put the result into a0.0.  
 * Increment a0.0 accordingly.
 */
void brw_clip_interp_vertex( struct brw_clip_compile *c,
			     struct brw_indirect dest_ptr,
			     struct brw_indirect v0_ptr, /* from */
			     struct brw_indirect v1_ptr, /* to */
			     struct brw_reg t0,
			     GLboolean force_edgeflag)
{
   struct brw_compile *p = &c->func;
   struct brw_reg tmp = get_tmp(c);
   GLuint i;

   /* Just copy the vertex header:
    */
   /*
    * After CLIP stage, only first 256 bits of the VUE are read
    * back on IGDNG, so needn't change it
    */
   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
      
   /* Iterate over each attribute (could be done in pairs?)
    */
   for (i = 0; i < c->nr_attrs; i++) {
      GLuint delta = i*16 + 32;

      if (BRW_IS_IGDNG(p->brw))
          delta = i * 16 + 32 * 3;

      if (delta == c->offset[VERT_RESULT_EDGE]) {
	 if (force_edgeflag) 
	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
	 else
	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
      }
      else {
	 /* Interpolate: 
	  *
	  *        New = attr0 + t*attr1 - t*attr0
	  */
	 brw_MUL(p, 
		 vec4(brw_null_reg()),
		 deref_4f(v1_ptr, delta),
		 t0);

	 brw_MAC(p, 
		 tmp,	      
		 negate(deref_4f(v0_ptr, delta)),
		 t0); 
	      
	 brw_ADD(p,
		 deref_4f(dest_ptr, delta), 
		 deref_4f(v0_ptr, delta),
		 tmp);
      }
   }

   if (i & 1) {
      GLuint delta = i*16 + 32;

      if (BRW_IS_IGDNG(p->brw))
          delta = i * 16 + 32 * 3;

      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
   }

   release_tmp(c, tmp);

   /* Recreate the projected (NDC) coordinate in the new vertex
    * header:
    */
   brw_clip_project_vertex(c, dest_ptr );
}
static enum pipe_error
sf_unit_create_from_key(struct brw_context *brw,
                        struct brw_sf_unit_key *key,
                        struct brw_winsys_reloc *reloc,
                        struct brw_winsys_buffer **bo_out)
{
   struct brw_sf_unit_state sf;
   enum pipe_error ret;
   int chipset_max_threads;
   memset(&sf, 0, sizeof(sf));

   sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
   /* reloc */
   sf.thread0.kernel_start_pointer = 0;

   sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;

   sf.thread3.dispatch_grf_start_reg = 3;

   if (BRW_IS_IGDNG(brw))
       sf.thread3.urb_entry_read_offset = 3;
   else
       sf.thread3.urb_entry_read_offset = 1;

   sf.thread3.urb_entry_read_length = key->urb_entry_read_length;

   sf.thread4.nr_urb_entries = key->nr_urb_entries;
   sf.thread4.urb_entry_allocation_size = key->sfsize - 1;

   /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or 
    * 48(IGDNG) threads 
    */
   if (BRW_IS_IGDNG(brw))
      chipset_max_threads = 48;
   else
      chipset_max_threads = 24;

   sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;

   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
      sf.thread4.max_threads = 0;

   if (BRW_DEBUG & DEBUG_STATS)
      sf.thread4.stats_enable = 1;

   /* CACHE_NEW_SF_VP */
   /* reloc */
   sf.sf5.sf_viewport_state_offset = 0;

   sf.sf5.viewport_transform = 1;

   if (key->scissor)
      sf.sf6.scissor = 1;

   if (key->front_ccw)
      sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
   else
      sf.sf5.front_winding = BRW_FRONTWINDING_CW;

   switch (key->cull_face) {
   case PIPE_FACE_FRONT:
      sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
      break;
   case PIPE_FACE_BACK:
      sf.sf6.cull_mode = BRW_CULLMODE_BACK;
      break;
   case PIPE_FACE_FRONT_AND_BACK:
      sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
      break;
   case PIPE_FACE_NONE:
      sf.sf6.cull_mode = BRW_CULLMODE_NONE;
      break;
   default:
      assert(0);
      sf.sf6.cull_mode = BRW_CULLMODE_NONE;
      break;
   }

   /* _NEW_LINE */
   /* XXX use ctx->Const.Min/MaxLineWidth here */
   sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);

   sf.sf6.line_endcap_aa_region_width = 1;
   if (key->line_smooth)
      sf.sf6.aa_enable = 1;
   else if (sf.sf6.line_width <= 0x2)
       sf.sf6.line_width = 0;

   /* XXX: gl_rasterization_rules?  something else?
    */
   sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
   sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
   sf.sf6.point_rast_rule = 1;

   /* XXX clamp max depends on AA vs. non-AA */

   /* _NEW_POINT */
   sf.sf7.sprite_point = key->point_sprite;
   sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
   sf.sf7.use_point_size_state = !key->point_attenuated;
   sf.sf7.aa_line_distance_mode = 0;

   /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
    */
   if (!key->flatshade_first) {
      sf.sf7.trifan_pv = 2;
      sf.sf7.linestrip_pv = 1;
      sf.sf7.tristrip_pv = 2;
   } else {
      sf.sf7.trifan_pv = 1;
      sf.sf7.linestrip_pv = 0;
      sf.sf7.tristrip_pv = 0;
   }

   sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable;

   /* Set bias for OpenGL rasterization rules:
    */
   if (key->gl_rasterization_rules) {
      sf.sf6.dest_org_vbias = 0x8;
      sf.sf6.dest_org_hbias = 0x8;
   }
   else {
      sf.sf6.dest_org_vbias = 0x0;
      sf.sf6.dest_org_hbias = 0x0;
   }

   ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
                          key, sizeof(*key),
                          reloc, 2,
                          &sf, sizeof(sf),
                          NULL, NULL,
                          bo_out);
   if (ret)
      return ret;

   
   return PIPE_OK;
}
static void brw_translate_vertex_elements(struct brw_context *brw,
                                          struct brw_vertex_element_packet *brw_velems,
                                          const struct pipe_vertex_element *attribs,
                                          unsigned count)
{
   unsigned i;

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   brw_velems->header.opcode = CMD_VERTEX_ELEMENT;

   if (count == 0) {
      brw_velems->header.length = 1;
      brw_velems->ve[0].ve0.src_offset = 0;
      brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
      brw_velems->ve[0].ve0.valid = 1;
      brw_velems->ve[0].ve0.vertex_buffer_index = 0;
      brw_velems->ve[0].ve1.dst_offset = 0;
      brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0;
      brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0;
      brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0;
      brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT;
      return;
   }


   /* Now emit vertex element (VEP) state packets.
    *
    */
   brw_velems->header.length = (1 + count * 2) - 2;
   for (i = 0; i < count; i++) {
      const struct pipe_vertex_element *input = &attribs[i];
      unsigned nr_components = util_format_get_nr_components(input->src_format);

      uint32_t format = brw_translate_surface_format( input->src_format );
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      switch (nr_components) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
         break;
      }

      brw_velems->ve[i].ve0.src_offset = input->src_offset;
      brw_velems->ve[i].ve0.src_format = format;
      brw_velems->ve[i].ve0.valid = 1;
      brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index;
      brw_velems->ve[i].ve1.vfcomponent0 = comp0;
      brw_velems->ve[i].ve1.vfcomponent1 = comp1;
      brw_velems->ve[i].ve1.vfcomponent2 = comp2;
      brw_velems->ve[i].ve1.vfcomponent3 = comp3;

      if (BRW_IS_IGDNG(brw))
         brw_velems->ve[i].ve1.dst_offset = 0;
      else
         brw_velems->ve[i].ve1.dst_offset = i * 4;
   }
}
Example #17
0
static int upload_invarient_state( struct brw_context *brw )
{
   {
      /* 0x61040000  Pipeline Select */
      /*     PipelineSelect            : 0 */
      struct brw_pipeline_select ps;

      memset(&ps, 0, sizeof(ps));
      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
	 ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
      else
	 ps.header.opcode = CMD_PIPELINE_SELECT_965;
      ps.header.pipeline_select = 0;
      BRW_BATCH_STRUCT(brw, &ps);
   }

   {
      struct brw_global_depth_offset_clamp gdo;
      memset(&gdo, 0, sizeof(gdo));

      /* Disable depth offset clamping. 
       */
      gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
      gdo.header.length = sizeof(gdo)/4 - 2;
      gdo.depth_offset_clamp = 0.0;

      BRW_BATCH_STRUCT(brw, &gdo);
   }


   /* 0x61020000  State Instruction Pointer */
   {
      struct brw_system_instruction_pointer sip;
      memset(&sip, 0, sizeof(sip));

      sip.header.opcode = CMD_STATE_INSN_POINTER;
      sip.header.length = 0;
      sip.bits0.pad = 0;
      sip.bits0.system_instruction_pointer = 0;
      BRW_BATCH_STRUCT(brw, &sip);
   }

   /* VF Statistics */
   {
      struct brw_vf_statistics vfs;
      memset(&vfs, 0, sizeof(vfs));

      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) 
	 vfs.opcode = CMD_VF_STATISTICS_GM45;
      else 
	 vfs.opcode = CMD_VF_STATISTICS_965;

      if (BRW_DEBUG & DEBUG_STATS)
	 vfs.statistics_enable = 1; 

      BRW_BATCH_STRUCT(brw, &vfs);
   }
   
   if (!BRW_IS_965(brw))
   {
      struct brw_aa_line_parameters balp;

      /* use legacy aa line coverage computation */
      memset(&balp, 0, sizeof(balp));
      balp.header.opcode = CMD_AA_LINE_PARAMETERS;
      balp.header.length = sizeof(balp) / 4 - 2;
   
      BRW_BATCH_STRUCT(brw, &balp);
   }

   {
      struct brw_polygon_stipple_offset bpso;
      
      /* This is invarient state in gallium:
       */
      memset(&bpso, 0, sizeof(bpso));
      bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
      bpso.header.length = sizeof(bpso)/4-2;
      bpso.bits0.y_offset = 0;
      bpso.bits0.x_offset = 0;

      BRW_BATCH_STRUCT(brw, &bpso);
   }
   
   return 0;
}
Example #18
0
static int emit_depthbuffer(struct brw_context *brw)
{
   struct pipe_surface *surface = brw->curr.fb.zsbuf;
   unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;

   if (surface == NULL) {
      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
		(BRW_SURFACE_NULL << 29));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);

      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
         OUT_BATCH(0);

      ADVANCE_BATCH();
   } else {
      struct brw_winsys_buffer *bo;
      unsigned int format;
      unsigned int pitch;
      unsigned int cpp;

      switch (surface->format) {
      case PIPE_FORMAT_Z16_UNORM:
	 format = BRW_DEPTHFORMAT_D16_UNORM;
	 cpp = 2;
	 break;
      case PIPE_FORMAT_Z24X8_UNORM:
      case PIPE_FORMAT_Z24S8_UNORM:
	 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
	 cpp = 4;
	 break;
      case PIPE_FORMAT_Z32_FLOAT:
	 format = BRW_DEPTHFORMAT_D32_FLOAT;
	 cpp = 4;
	 break;
      default:
	 assert(0);
	 return PIPE_ERROR_BAD_INPUT;
      }

      bo = brw_surface(surface)->bo;
      pitch = brw_surface(surface)->pitch;

      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH(((pitch * cpp) - 1) |
		(format << 18) |
		(BRW_TILEWALK_YMAJOR << 26) |
		((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |
		(BRW_SURFACE_2D << 29));
      OUT_RELOC(bo,
		BRW_USAGE_DEPTH_BUFFER,
		surface->offset);
      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
		((pitch - 1) << 6) |
		((surface->height - 1) << 19));
      OUT_BATCH(0);

      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
         OUT_BATCH(0);

      ADVANCE_BATCH();
   }

   return 0;
}
static enum pipe_error
clip_unit_create_from_key(struct brw_context *brw,
                          struct brw_clip_unit_key *key,
                          struct brw_winsys_reloc *reloc,
                          struct brw_winsys_buffer **bo_out)
{
   struct brw_clip_unit_state clip;
   enum pipe_error ret;

   memset(&clip, 0, sizeof(clip));

   clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
   /* reloc */
   clip.thread0.kernel_start_pointer = 0;

   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
   clip.thread1.single_program_flow = 1;

   clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
   clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
   clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
   clip.thread3.dispatch_grf_start_reg = 1;
   clip.thread3.urb_entry_read_offset = 0;

   clip.thread4.nr_urb_entries = key->nr_urb_entries;
   clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
   /* If we have enough clip URB entries to run two threads, do so.
    */
   if (key->nr_urb_entries >= 10) {
      /* Half of the URB entries go to each thread, and it has to be an
       * even number.
       */
      assert(key->nr_urb_entries % 2 == 0);
      
      /* Although up to 16 concurrent Clip threads are allowed on IGDNG, 
       * only 2 threads can output VUEs at a time.
       */
      if (BRW_IS_IGDNG(brw))
         clip.thread4.max_threads = 16 - 1;        
      else
         clip.thread4.max_threads = 2 - 1;
   } else {
      assert(key->nr_urb_entries >= 5);
      clip.thread4.max_threads = 1 - 1;
   }

   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
      clip.thread4.max_threads = 0;

   if (BRW_DEBUG & DEBUG_STATS)
      clip.thread4.stats_enable = 1;

   clip.clip5.userclip_enable_flags = 0x7f;
   clip.clip5.userclip_must_clip = 1;
   clip.clip5.guard_band_enable = 0;
   if (!key->depth_clamp)
      clip.clip5.viewport_z_clip_enable = 1;
   clip.clip5.viewport_xy_clip_enable = 1;
   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
   clip.clip5.api_mode = BRW_CLIP_API_OGL;
   clip.clip5.clip_mode = key->clip_mode;

   if (BRW_IS_G4X(brw))
      clip.clip5.negative_w_clip_test = 1;

   clip.clip6.clipper_viewport_state_ptr = 0;
   clip.viewport_xmin = -1;
   clip.viewport_xmax = 1;
   clip.viewport_ymin = -1;
   clip.viewport_ymax = 1;

   ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
                          key, sizeof(*key),
                          reloc, 1,
                          &clip, sizeof(clip),
                          NULL, NULL,
                          bo_out);
   if (ret)
      return ret;

   return PIPE_OK;
}
Example #20
0
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
			      GLuint nr_verts )
{
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   if (c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec4_grf(i, 0);
      i += (6 + c->key.nr_userclip + 1) / 2;

      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
   }
   else
      c->prog_data.curb_read_length = 0;


   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   if (c->nr_attrs & 1) {
      for (j = 0; j < 3; j++) {
	 GLuint delta = c->nr_attrs*16 + 32;

         if (BRW_IS_IGDNG(c->func.brw))
             delta = c->nr_attrs * 16 + 32 * 3;

	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
      }
   }

   c->reg.t          = brw_vec1_grf(i, 0);
   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
   c->reg.plane_equation = brw_vec4_grf(i, 4);
   i++;

   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
   c->reg.dp         = brw_vec1_grf(i, 4);
   i++;

   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   if (!c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
      i++;
   }

   if (c->key.do_unfilled) {
      c->reg.dir     = brw_vec4_grf(i, 0);
      c->reg.offset  = brw_vec4_grf(i, 4);
      i++;
      c->reg.tmp0    = brw_vec4_grf(i, 0);
      c->reg.tmp1    = brw_vec4_grf(i, 4);
      i++;
   }

   if (c->need_ff_sync) {
      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
      i++;
   }

   c->first_tmp = i;
   c->last_tmp = i;

   c->prog_data.urb_read_length = c->nr_regs; /* ? */
   c->prog_data.total_grf = i;
}