/** * Define the base addresses which some state is referenced from. * * This allows us to avoid having to emit relocations in many places for * cached state, and instead emit pointers inside of large, mostly-static * state pools. This comes at the expense of memory, and more expensive cache * misses. */ static int upload_state_base_address( struct brw_context *brw ) { /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ if (BRW_IS_IGDNG(brw)) { BEGIN_BATCH(8, IGNORE_CLIPRECTS); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ OUT_BATCH(1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* Instruction base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ OUT_BATCH(1); /* Instruction access upper bound */ ADVANCE_BATCH(); } else { BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ OUT_BATCH(1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ ADVANCE_BATCH(); } return 0; }
void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { GLuint br = 1; if (BRW_IS_IGDNG(p->brw)) br = 2; if (p->single_program_flow) { /* In single program flow mode, there's no need to execute an ENDIF, * since we don't need to do any stack operations, and if we're executing * currently, we want to just continue executing. */ struct brw_instruction *next = &p->store[p->nr_insn]; assert(patch_insn->header.opcode == BRW_OPCODE_ADD); patch_insn->bits3.ud = (next - patch_insn) * 16; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = patch_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; insn->header.thread_control = BRW_THREAD_SWITCH; assert(patch_insn->bits3.if_else.jump_count == 0); /* Patch the if or else instructions to point at this or the next * instruction respectively. */ if (patch_insn->header.opcode == BRW_OPCODE_IF) { /* Automagically turn it into an IFF: */ patch_insn->header.opcode = BRW_OPCODE_IFF; patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 0; patch_insn->bits3.if_else.pad0 = 0; } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 1; patch_insn->bits3.if_else.pad0 = 0; } else { assert(0); } /* Also pop item off the stack in the endif instruction: */ insn->bits3.if_else.jump_count = 0; insn->bits3.if_else.pop_count = 1; insn->bits3.if_else.pad0 = 0; } }
static enum pipe_error gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key, struct brw_winsys_reloc *reloc, unsigned nr_reloc, struct brw_winsys_buffer **bo_out) { struct brw_gs_unit_state gs; enum pipe_error ret; memset(&gs, 0, sizeof(gs)); /* reloc */ gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; gs.thread0.kernel_start_pointer = 0; gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs.thread1.single_program_flow = 1; gs.thread3.dispatch_grf_start_reg = 1; gs.thread3.const_urb_entry_read_offset = 0; gs.thread3.const_urb_entry_read_length = 0; gs.thread3.urb_entry_read_offset = 0; gs.thread3.urb_entry_read_length = key->urb_entry_read_length; gs.thread4.nr_urb_entries = key->nr_urb_entries; gs.thread4.urb_entry_allocation_size = key->urb_size - 1; if (key->nr_urb_entries >= 8) gs.thread4.max_threads = 1; else gs.thread4.max_threads = 0; if (BRW_IS_IGDNG(brw)) gs.thread4.rendering_enable = 1; if (BRW_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT, key, sizeof(*key), reloc, nr_reloc, &gs, sizeof(gs), NULL, NULL, bo_out); if (ret) return ret; return PIPE_OK; }
/* FORWARD JUMPS: */ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *jmp_insn) { struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; if (BRW_IS_IGDNG(p->brw)) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); }
static dri_bo * vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) { struct brw_vs_unit_state vs; dri_bo *bo; int chipset_max_threads; memset(&vs, 0, sizeof(vs)); vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, * and those dwords will be written to the second URB handle when we * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; if (BRW_IS_IGDNG(brw)) vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ else vs.thread1.binding_table_entry_count = key->nr_surfaces; vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; if (BRW_IS_IGDNG(brw)) { switch (key->nr_urb_entries) { case 8: case 12: case 16: case 32: case 64: case 96: case 128: case 168: case 192: case 224: case 256: vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; break; default: assert(0); } } else { switch (key->nr_urb_entries) {
static dri_bo * gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) { struct brw_gs_unit_state gs; dri_bo *bo; memset(&gs, 0, sizeof(gs)); gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; if (key->prog_active) /* reloc */ gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs.thread1.single_program_flow = 1; gs.thread3.dispatch_grf_start_reg = 1; gs.thread3.const_urb_entry_read_offset = 0; gs.thread3.const_urb_entry_read_length = 0; gs.thread3.urb_entry_read_offset = 0; gs.thread3.urb_entry_read_length = key->urb_entry_read_length; gs.thread4.nr_urb_entries = key->nr_urb_entries; gs.thread4.urb_entry_allocation_size = key->urb_size - 1; if (key->nr_urb_entries >= 8) gs.thread4.max_threads = 1; else gs.thread4.max_threads = 0; if (BRW_IS_IGDNG(brw)) gs.thread4.rendering_enable = 1; if (INTEL_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, key, sizeof(*key), &brw->gs.prog_bo, 1, &gs, sizeof(gs), NULL, NULL); if (key->prog_active) { /* Emit GS program relocation */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gs.thread0.grf_reg_count << 1, offsetof(struct brw_gs_unit_state, thread0), brw->gs.prog_bo); }
static void brw_set_sampler_message(struct brw_context *brw, struct brw_instruction *insn, GLuint binding_table_index, GLuint sampler, GLuint msg_type, GLuint response_length, GLuint msg_length, GLboolean eot, GLuint header_present, GLuint simd_mode) { assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); if (BRW_IS_IGDNG(brw)) { insn->bits3.sampler_igdng.binding_table_index = binding_table_index; insn->bits3.sampler_igdng.sampler = sampler; insn->bits3.sampler_igdng.msg_type = msg_type; insn->bits3.sampler_igdng.simd_mode = simd_mode; insn->bits3.sampler_igdng.header_present = header_present; insn->bits3.sampler_igdng.response_length = response_length; insn->bits3.sampler_igdng.msg_length = msg_length; insn->bits3.sampler_igdng.end_of_thread = eot; insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; insn->bits2.send_igdng.end_of_thread = eot; } else if (BRW_IS_G4X(brw)) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; insn->bits3.sampler_g4x.response_length = response_length; insn->bits3.sampler_g4x.msg_length = msg_length; insn->bits3.sampler_g4x.end_of_thread = eot; insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; } else { insn->bits3.sampler.binding_table_index = binding_table_index; insn->bits3.sampler.sampler = sampler; insn->bits3.sampler.msg_type = msg_type; insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; insn->bits3.sampler.response_length = response_length; insn->bits3.sampler.msg_length = msg_length; insn->bits3.sampler.end_of_thread = eot; insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; } }
struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { struct brw_instruction *insn; GLuint br = 1; if (BRW_IS_IGDNG(p->brw)) br = 2; if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); } else { insn = next_insn(p, BRW_OPCODE_ELSE); } brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = if_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; if (!p->single_program_flow) insn->header.thread_control = BRW_THREAD_SWITCH; /* Patch the if instruction to point at this instruction. */ if (p->single_program_flow) { assert(if_insn->header.opcode == BRW_OPCODE_ADD); if_insn->bits3.ud = (insn - if_insn + 1) * 16; } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); if_insn->bits3.if_else.jump_count = br * (insn - if_insn); if_insn->bits3.if_else.pop_count = 0; if_insn->bits3.if_else.pad0 = 0; } return insn; }
static void brw_set_urb_message( struct brw_context *brw, struct brw_instruction *insn, GLboolean allocate, GLboolean used, GLuint msg_length, GLuint response_length, GLboolean end_of_thread, GLboolean complete, GLuint offset, GLuint swizzle_control ) { brw_set_src1(insn, brw_imm_d(0)); if (BRW_IS_IGDNG(brw)) { insn->bits3.urb_igdng.opcode = 0; /* ? */ insn->bits3.urb_igdng.offset = offset; insn->bits3.urb_igdng.swizzle_control = swizzle_control; insn->bits3.urb_igdng.allocate = allocate; insn->bits3.urb_igdng.used = used; /* ? */ insn->bits3.urb_igdng.complete = complete; insn->bits3.urb_igdng.header_present = 1; insn->bits3.urb_igdng.response_length = response_length; insn->bits3.urb_igdng.msg_length = msg_length; insn->bits3.urb_igdng.end_of_thread = end_of_thread; insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; insn->bits2.send_igdng.end_of_thread = end_of_thread; } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; insn->bits3.urb.swizzle_control = swizzle_control; insn->bits3.urb.allocate = allocate; insn->bits3.urb.used = used; /* ? */ insn->bits3.urb.complete = complete; insn->bits3.urb.response_length = response_length; insn->bits3.urb.msg_length = msg_length; insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; insn->bits3.urb.end_of_thread = end_of_thread; } }
struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; GLuint br = 1; if (BRW_IS_IGDNG(p->brw)) br = 2; if (p->single_program_flow) insn = next_insn(p, BRW_OPCODE_ADD); else insn = next_insn(p, BRW_OPCODE_WHILE); brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; if (p->single_program_flow) { insn->header.execution_size = BRW_EXECUTE_1; insn->bits3.d = (do_insn - insn) * 16; } else { insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } /* insn->header.mask_control = BRW_MASK_ENABLE; */ /* insn->header.mask_control = BRW_MASK_DISABLE; */ p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; }
static void brw_set_math_message( struct brw_context *brw, struct brw_instruction *insn, GLuint msg_length, GLuint response_length, GLuint function, GLuint integer_type, GLboolean low_precision, GLboolean saturate, GLuint dataType ) { brw_set_src1(insn, brw_imm_d(0)); if (BRW_IS_IGDNG(brw)) { insn->bits3.math_igdng.function = function; insn->bits3.math_igdng.int_type = integer_type; insn->bits3.math_igdng.precision = low_precision; insn->bits3.math_igdng.saturate = saturate; insn->bits3.math_igdng.data_type = dataType; insn->bits3.math_igdng.snapshot = 0; insn->bits3.math_igdng.header_present = 0; insn->bits3.math_igdng.response_length = response_length; insn->bits3.math_igdng.msg_length = msg_length; insn->bits3.math_igdng.end_of_thread = 0; insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; insn->bits2.send_igdng.end_of_thread = 0; } else { insn->bits3.math.function = function; insn->bits3.math.int_type = integer_type; insn->bits3.math.precision = low_precision; insn->bits3.math.saturate = saturate; insn->bits3.math.data_type = dataType; insn->bits3.math.response_length = response_length; insn->bits3.math.msg_length = msg_length; insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; insn->bits3.math.end_of_thread = 0; } }
static void brw_set_dp_write_message( struct brw_context *brw, struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, GLuint msg_length, GLuint pixel_scoreboard_clear, GLuint response_length, GLuint end_of_thread ) { brw_set_src1(insn, brw_imm_d(0)); if (BRW_IS_IGDNG(brw)) { insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; insn->bits3.dp_write_igdng.msg_control = msg_control; insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_write_igdng.msg_type = msg_type; insn->bits3.dp_write_igdng.send_commit_msg = 0; insn->bits3.dp_write_igdng.header_present = 1; insn->bits3.dp_write_igdng.response_length = response_length; insn->bits3.dp_write_igdng.msg_length = msg_length; insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; insn->bits2.send_igdng.end_of_thread = end_of_thread; } else { insn->bits3.dp_write.binding_table_index = binding_table_index; insn->bits3.dp_write.msg_control = msg_control; insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_write.msg_type = msg_type; insn->bits3.dp_write.send_commit_msg = 0; insn->bits3.dp_write.response_length = response_length; insn->bits3.dp_write.msg_length = msg_length; insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; insn->bits3.dp_write.end_of_thread = end_of_thread; } }
static void brw_set_dp_read_message( struct brw_context *brw, struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, GLuint target_cache, GLuint msg_length, GLuint response_length, GLuint end_of_thread ) { brw_set_src1(insn, brw_imm_d(0)); if (BRW_IS_IGDNG(brw)) { insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; insn->bits3.dp_read_igdng.msg_control = msg_control; insn->bits3.dp_read_igdng.msg_type = msg_type; insn->bits3.dp_read_igdng.target_cache = target_cache; insn->bits3.dp_read_igdng.header_present = 1; insn->bits3.dp_read_igdng.response_length = response_length; insn->bits3.dp_read_igdng.msg_length = msg_length; insn->bits3.dp_read_igdng.pad1 = 0; insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; insn->bits2.send_igdng.end_of_thread = end_of_thread; } else { insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ insn->bits3.dp_read.response_length = response_length; /*16:19*/ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ insn->bits3.dp_read.pad1 = 0; /*28:30*/ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ } }
/* Interpolate between two vertices and put the result into a0.0. * Increment a0.0 accordingly. */ void brw_clip_interp_vertex( struct brw_clip_compile *c, struct brw_indirect dest_ptr, struct brw_indirect v0_ptr, /* from */ struct brw_indirect v1_ptr, /* to */ struct brw_reg t0, GLboolean force_edgeflag) { struct brw_compile *p = &c->func; struct brw_reg tmp = get_tmp(c); GLuint i; /* Just copy the vertex header: */ /* * After CLIP stage, only first 256 bits of the VUE are read * back on IGDNG, so needn't change it */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* Iterate over each attribute (could be done in pairs?) */ for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; if (BRW_IS_IGDNG(p->brw)) delta = i * 16 + 32 * 3; if (delta == c->offset[VERT_RESULT_EDGE]) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); else brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); } else { /* Interpolate: * * New = attr0 + t*attr1 - t*attr0 */ brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0); brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0); brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); } } if (i & 1) { GLuint delta = i*16 + 32; if (BRW_IS_IGDNG(p->brw)) delta = i * 16 + 32 * 3; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } release_tmp(c, tmp); /* Recreate the projected (NDC) coordinate in the new vertex * header: */ brw_clip_project_vertex(c, dest_ptr ); }
static enum pipe_error sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, struct brw_winsys_reloc *reloc, struct brw_winsys_buffer **bo_out) { struct brw_sf_unit_state sf; enum pipe_error ret; int chipset_max_threads; memset(&sf, 0, sizeof(sf)); sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ sf.thread0.kernel_start_pointer = 0; sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; sf.thread3.dispatch_grf_start_reg = 3; if (BRW_IS_IGDNG(brw)) sf.thread3.urb_entry_read_offset = 3; else sf.thread3.urb_entry_read_offset = 1; sf.thread3.urb_entry_read_length = key->urb_entry_read_length; sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or * 48(IGDNG) threads */ if (BRW_IS_IGDNG(brw)) chipset_max_threads = 48; else chipset_max_threads = 24; sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; if (BRW_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; if (BRW_DEBUG & DEBUG_STATS) sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ /* reloc */ sf.sf5.sf_viewport_state_offset = 0; sf.sf5.viewport_transform = 1; if (key->scissor) sf.sf6.scissor = 1; if (key->front_ccw) sf.sf5.front_winding = BRW_FRONTWINDING_CCW; else sf.sf5.front_winding = BRW_FRONTWINDING_CW; switch (key->cull_face) { case PIPE_FACE_FRONT: sf.sf6.cull_mode = BRW_CULLMODE_FRONT; break; case PIPE_FACE_BACK: sf.sf6.cull_mode = BRW_CULLMODE_BACK; break; case PIPE_FACE_FRONT_AND_BACK: sf.sf6.cull_mode = BRW_CULLMODE_BOTH; break; case PIPE_FACE_NONE: sf.sf6.cull_mode = BRW_CULLMODE_NONE; break; default: assert(0); sf.sf6.cull_mode = BRW_CULLMODE_NONE; break; } /* _NEW_LINE */ /* XXX use ctx->Const.Min/MaxLineWidth here */ sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1); sf.sf6.line_endcap_aa_region_width = 1; if (key->line_smooth) sf.sf6.aa_enable = 1; else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; /* XXX: gl_rasterization_rules? something else? */ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; sf.sf6.point_rast_rule = 1; /* XXX clamp max depends on AA vs. non-AA */ /* _NEW_POINT */ sf.sf7.sprite_point = key->point_sprite; sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); sf.sf7.use_point_size_state = !key->point_attenuated; sf.sf7.aa_line_distance_mode = 0; /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ if (!key->flatshade_first) { sf.sf7.trifan_pv = 2; sf.sf7.linestrip_pv = 1; sf.sf7.tristrip_pv = 2; } else { sf.sf7.trifan_pv = 1; sf.sf7.linestrip_pv = 0; sf.sf7.tristrip_pv = 0; } sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable; /* Set bias for OpenGL rasterization rules: */ if (key->gl_rasterization_rules) { sf.sf6.dest_org_vbias = 0x8; sf.sf6.dest_org_hbias = 0x8; } else { sf.sf6.dest_org_vbias = 0x0; sf.sf6.dest_org_hbias = 0x0; } ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), reloc, 2, &sf, sizeof(sf), NULL, NULL, bo_out); if (ret) return ret; return PIPE_OK; }
static void brw_translate_vertex_elements(struct brw_context *brw, struct brw_vertex_element_packet *brw_velems, const struct pipe_vertex_element *attribs, unsigned count) { unsigned i; /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ brw_velems->header.opcode = CMD_VERTEX_ELEMENT; if (count == 0) { brw_velems->header.length = 1; brw_velems->ve[0].ve0.src_offset = 0; brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; brw_velems->ve[0].ve0.valid = 1; brw_velems->ve[0].ve0.vertex_buffer_index = 0; brw_velems->ve[0].ve1.dst_offset = 0; brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0; brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0; brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0; brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT; return; } /* Now emit vertex element (VEP) state packets. * */ brw_velems->header.length = (1 + count * 2) - 2; for (i = 0; i < count; i++) { const struct pipe_vertex_element *input = &attribs[i]; unsigned nr_components = util_format_get_nr_components(input->src_format); uint32_t format = brw_translate_surface_format( input->src_format ); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; switch (nr_components) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; break; } brw_velems->ve[i].ve0.src_offset = input->src_offset; brw_velems->ve[i].ve0.src_format = format; brw_velems->ve[i].ve0.valid = 1; brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index; brw_velems->ve[i].ve1.vfcomponent0 = comp0; brw_velems->ve[i].ve1.vfcomponent1 = comp1; brw_velems->ve[i].ve1.vfcomponent2 = comp2; brw_velems->ve[i].ve1.vfcomponent3 = comp3; if (BRW_IS_IGDNG(brw)) brw_velems->ve[i].ve1.dst_offset = 0; else brw_velems->ve[i].ve1.dst_offset = i * 4; } }
static int upload_invarient_state( struct brw_context *brw ) { { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ps.header.opcode = CMD_PIPELINE_SELECT_GM45; else ps.header.opcode = CMD_PIPELINE_SELECT_965; ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } { struct brw_global_depth_offset_clamp gdo; memset(&gdo, 0, sizeof(gdo)); /* Disable depth offset clamping. */ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; gdo.header.length = sizeof(gdo)/4 - 2; gdo.depth_offset_clamp = 0.0; BRW_BATCH_STRUCT(brw, &gdo); } /* 0x61020000 State Instruction Pointer */ { struct brw_system_instruction_pointer sip; memset(&sip, 0, sizeof(sip)); sip.header.opcode = CMD_STATE_INSN_POINTER; sip.header.length = 0; sip.bits0.pad = 0; sip.bits0.system_instruction_pointer = 0; BRW_BATCH_STRUCT(brw, &sip); } /* VF Statistics */ { struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) vfs.opcode = CMD_VF_STATISTICS_GM45; else vfs.opcode = CMD_VF_STATISTICS_965; if (BRW_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; BRW_BATCH_STRUCT(brw, &vfs); } if (!BRW_IS_965(brw)) { struct brw_aa_line_parameters balp; /* use legacy aa line coverage computation */ memset(&balp, 0, sizeof(balp)); balp.header.opcode = CMD_AA_LINE_PARAMETERS; balp.header.length = sizeof(balp) / 4 - 2; BRW_BATCH_STRUCT(brw, &balp); } { struct brw_polygon_stipple_offset bpso; /* This is invarient state in gallium: */ memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; bpso.bits0.y_offset = 0; bpso.bits0.x_offset = 0; BRW_BATCH_STRUCT(brw, &bpso); } return 0; }
static int emit_depthbuffer(struct brw_context *brw) { struct pipe_surface *surface = brw->curr.fb.zsbuf; unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; if (surface == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); } else { struct brw_winsys_buffer *bo; unsigned int format; unsigned int pitch; unsigned int cpp; switch (surface->format) { case PIPE_FORMAT_Z16_UNORM: format = BRW_DEPTHFORMAT_D16_UNORM; cpp = 2; break; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24S8_UNORM: format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; cpp = 4; break; case PIPE_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; cpp = 4; break; default: assert(0); return PIPE_ERROR_BAD_INPUT; } bo = brw_surface(surface)->bo; pitch = brw_surface(surface)->pitch; BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((pitch * cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(bo, BRW_USAGE_DEPTH_BUFFER, surface->offset); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((pitch - 1) << 6) | ((surface->height - 1) << 19)); OUT_BATCH(0); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); } return 0; }
static enum pipe_error clip_unit_create_from_key(struct brw_context *brw, struct brw_clip_unit_key *key, struct brw_winsys_reloc *reloc, struct brw_winsys_buffer **bo_out) { struct brw_clip_unit_state clip; enum pipe_error ret; memset(&clip, 0, sizeof(clip)); clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ clip.thread0.kernel_start_pointer = 0; clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip.thread1.single_program_flow = 1; clip.thread3.urb_entry_read_length = key->urb_entry_read_length; clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; clip.thread3.dispatch_grf_start_reg = 1; clip.thread3.urb_entry_read_offset = 0; clip.thread4.nr_urb_entries = key->nr_urb_entries; clip.thread4.urb_entry_allocation_size = key->urb_size - 1; /* If we have enough clip URB entries to run two threads, do so. */ if (key->nr_urb_entries >= 10) { /* Half of the URB entries go to each thread, and it has to be an * even number. */ assert(key->nr_urb_entries % 2 == 0); /* Although up to 16 concurrent Clip threads are allowed on IGDNG, * only 2 threads can output VUEs at a time. */ if (BRW_IS_IGDNG(brw)) clip.thread4.max_threads = 16 - 1; else clip.thread4.max_threads = 2 - 1; } else { assert(key->nr_urb_entries >= 5); clip.thread4.max_threads = 1 - 1; } if (BRW_DEBUG & DEBUG_SINGLE_THREAD) clip.thread4.max_threads = 0; if (BRW_DEBUG & DEBUG_STATS) clip.thread4.stats_enable = 1; clip.clip5.userclip_enable_flags = 0x7f; clip.clip5.userclip_must_clip = 1; clip.clip5.guard_band_enable = 0; if (!key->depth_clamp) clip.clip5.viewport_z_clip_enable = 1; clip.clip5.viewport_xy_clip_enable = 1; clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; clip.clip5.api_mode = BRW_CLIP_API_OGL; clip.clip5.clip_mode = key->clip_mode; if (BRW_IS_G4X(brw)) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; clip.viewport_xmin = -1; clip.viewport_xmax = 1; clip.viewport_ymin = -1; clip.viewport_ymax = 1; ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, key, sizeof(*key), reloc, 1, &clip, sizeof(clip), NULL, NULL, bo_out); if (ret) return ret; return PIPE_OK; }
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; if (c->key.nr_userclip) { c->reg.fixed_planes = brw_vec4_grf(i, 0); i += (6 + c->key.nr_userclip + 1) / 2; c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; } else c->prog_data.curb_read_length = 0; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } if (c->nr_attrs & 1) { for (j = 0; j < 3; j++) { GLuint delta = c->nr_attrs*16 + 32; if (BRW_IS_IGDNG(c->func.brw)) delta = c->nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } c->reg.t = brw_vec1_grf(i, 0); c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); i++; c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ c->reg.dp = brw_vec1_grf(i, 4); i++; c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; if (!c->key.nr_userclip) { c->reg.fixed_planes = brw_vec8_grf(i, 0); i++; } if (c->key.do_unfilled) { c->reg.dir = brw_vec4_grf(i, 0); c->reg.offset = brw_vec4_grf(i, 4); i++; c->reg.tmp0 = brw_vec4_grf(i, 0); c->reg.tmp1 = brw_vec4_grf(i, 4); i++; } if (c->need_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } c->first_tmp = i; c->last_tmp = i; c->prog_data.urb_read_length = c->nr_regs; /* ? */ c->prog_data.total_grf = i; }