/** * Allocate registers for GS. * * If sol_program is true, then: * * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF * 1 needs to be set aside to hold the streamed vertex buffer indices. * * - The thread will need to use the destination_indices register. */ static void brw_gs_alloc_regs( struct brw_gs_compile *c, GLuint nr_verts, bool sol_program ) { GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; /* Streamed vertex buffer indices */ if (sol_program) c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); if (sol_program) { c->reg.destination_indices = retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD); } c->prog_data.urb_read_length = c->nr_regs; c->prog_data.total_grf = i; }
void brw_NOP(struct brw_compile *p) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_ud(0x0)); }
static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { const struct gen_device_info *devinfo = c->func.devinfo; GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; if (c->key.nr_userclip) { c->reg.fixed_planes = brw_vec4_grf(i, 0); i += (6 + c->key.nr_userclip + 1) / 2; c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; } else c->prog_data.curb_read_length = 0; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < 4; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } c->reg.t = brw_vec1_grf(i, 0); c->reg.t0 = brw_vec1_grf(i, 1); c->reg.t1 = brw_vec1_grf(i, 2); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); i++; c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ c->reg.dp1 = brw_vec1_grf(i, 4); i++; if (!c->key.nr_userclip) { c->reg.fixed_planes = brw_vec8_grf(i, 0); i++; } c->reg.vertex_src_mask = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); c->reg.clipdistance_offset = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_W); i++; if (devinfo->gen == 5) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } c->first_tmp = i; c->last_tmp = i; c->prog_data.urb_read_length = c->nr_regs; /* ? */ c->prog_data.total_grf = i; }
void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { GLuint br = 1; if (BRW_IS_IGDNG(p->brw)) br = 2; if (p->single_program_flow) { /* In single program flow mode, there's no need to execute an ENDIF, * since we don't need to do any stack operations, and if we're executing * currently, we want to just continue executing. */ struct brw_instruction *next = &p->store[p->nr_insn]; assert(patch_insn->header.opcode == BRW_OPCODE_ADD); patch_insn->bits3.ud = (next - patch_insn) * 16; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = patch_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; insn->header.thread_control = BRW_THREAD_SWITCH; assert(patch_insn->bits3.if_else.jump_count == 0); /* Patch the if or else instructions to point at this or the next * instruction respectively. */ if (patch_insn->header.opcode == BRW_OPCODE_IF) { /* Automagically turn it into an IFF: */ patch_insn->header.opcode = BRW_OPCODE_IFF; patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 0; patch_insn->bits3.if_else.pad0 = 0; } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 1; patch_insn->bits3.if_else.pad0 = 0; } else { assert(0); } /* Also pop item off the stack in the endif instruction: */ insn->bits3.if_else.jump_count = 0; insn->bits3.if_else.pop_count = 1; insn->bits3.if_else.pad0 = 0; } }
/** * Determine the register corresponding to the given vue slot */ static struct brw_reg get_vue_slot(struct brw_sf_compile *c, struct brw_reg vert, int vue_slot) { GLuint off = vue_slot / 2 - c->urb_entry_read_offset; GLuint sub = vue_slot % 2; return brw_vec4_grf(vert.nr + off, sub * 4); }
static struct brw_reg get_tmp( struct brw_clip_compile *c ) { struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); if (++c->last_tmp > c->prog_data.total_grf) c->prog_data.total_grf = c->last_tmp; return tmp; }
static struct brw_reg get_vert_attr(struct brw_sf_compile *c, struct brw_reg vert, GLuint attr) { GLuint off = c->attr_to_idx[attr] / 2; GLuint sub = c->attr_to_idx[attr] % 2; return brw_vec4_grf(vert.nr + off, sub * 4); }
/** * Determine the register corresponding to the given vert_result. */ static struct brw_reg get_vert_result(struct brw_sf_compile *c, struct brw_reg vert, GLuint vert_result) { int vue_slot = c->vue_map.vert_result_to_slot[vert_result]; assert (vue_slot >= c->urb_entry_read_offset); GLuint off = vue_slot / 2 - c->urb_entry_read_offset; GLuint sub = vue_slot % 2; return brw_vec4_grf(vert.nr + off, sub * 4); }
void brw_blorp_const_color_program::alloc_regs() { int reg = 0; this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); prog_data.first_curbe_grf = reg; clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F); reg += BRW_BLORP_NUM_PUSH_CONST_REGS; /* Make sure we didn't run out of registers */ assert(reg <= GEN7_MRF_HACK_START); this->base_mrf = 2; }
static void brw_gs_alloc_regs( struct brw_gs_compile *c, GLuint nr_verts ) { GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } c->prog_data.urb_read_length = c->nr_regs; c->prog_data.total_grf = i; }
void vec4_gs_visitor::emit_prolog() { /* In vertex shaders, r0.2 is guaranteed to be initialized to zero. In * geometry shaders, it isn't (it contains a bunch of information we don't * need, like the input primitive type). We need r0.2 to be zero in order * to build scratch read/write messages correctly (otherwise this value * will be interpreted as a global offset, causing us to do our scratch * reads/writes to garbage memory). So just set it to zero at the top of * the shader. */ this->current_annotation = "clear r0.2"; dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u)); inst->force_writemask_all = true; /* Create a virtual register to hold the vertex count */ this->vertex_count = src_reg(this, glsl_type::uint_type); /* Initialize the vertex_count register to 0 */ this->current_annotation = "initialize vertex_count"; inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u))); inst->force_writemask_all = true; if (c->control_data_header_size_bits > 0) { /* Create a virtual register to hold the current set of control data * bits. */ this->control_data_bits = src_reg(this, glsl_type::uint_type); /* If we're outputting more than 32 control data bits, then EmitVertex() * will set control_data_bits to 0 after emitting the first vertex. * Otherwise, we need to initialize it to 0 here. */ if (c->control_data_header_size_bits <= 32) { this->current_annotation = "initialize control data bits"; inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u))); inst->force_writemask_all = true; } } this->current_annotation = NULL; }
void vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { dst_reg dest; src_reg src; switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: { /* The EmitNoIndirectInput flag guarantees our vertex index will * be constant. We should handle indirects someday. */ nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); nir_const_value *offset = nir_src_as_const_value(instr->src[1]); /* Make up a type...we have no way of knowing... */ const glsl_type *const type = glsl_type::ivec(instr->num_components); src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + instr->const_index[0] + offset->u32[0], type); /* gl_PointSize is passed in the .w component of the VUE header */ if (instr->const_index[0] == VARYING_SLOT_PSIZ) src.swizzle = BRW_SWIZZLE_WWWW; dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dest, src)); break; } case nir_intrinsic_load_input: unreachable("nir_lower_io should have produced per_vertex intrinsics"); case nir_intrinsic_emit_vertex_with_counter: { this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); int stream_id = instr->const_index[0]; gs_emit_vertex(stream_id); break; } case nir_intrinsic_end_primitive_with_counter: this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); gs_end_primitive(); break; case nir_intrinsic_set_vertex_count: this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); break; case nir_intrinsic_load_primitive_id: assert(gs_prog_data->include_primitive_id); dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D))); break; case nir_intrinsic_load_invocation_id: { src_reg invocation_id = src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]); assert(invocation_id.file != BAD_FILE); dest = get_nir_dest(instr->dest, invocation_id.type); emit(MOV(dest, invocation_id)); break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; if (c->key.nr_userclip) { c->reg.fixed_planes = brw_vec4_grf(i, 0); i += (6 + c->key.nr_userclip + 1) / 2; c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; } else c->prog_data.curb_read_length = 0; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } if (c->key.nr_attrs & 1) { for (j = 0; j < 3; j++) { GLuint delta = c->key.nr_attrs*16 + 32; if (c->chipset.is_igdng) delta = c->key.nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } c->reg.t = brw_vec1_grf(i, 0); c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); i++; c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ c->reg.dp = brw_vec1_grf(i, 4); i++; c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; if (!c->key.nr_userclip) { c->reg.fixed_planes = brw_vec8_grf(i, 0); i++; } if (c->key.do_unfilled) { c->reg.dir = brw_vec4_grf(i, 0); c->reg.offset = brw_vec4_grf(i, 4); i++; c->reg.tmp0 = brw_vec4_grf(i, 0); c->reg.tmp1 = brw_vec4_grf(i, 4); i++; } if (c->need_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } c->first_tmp = i; c->last_tmp = i; c->prog_data.urb_read_length = c->nr_regs; /* ? */ c->prog_data.total_grf = i; }
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { struct intel_context *intel = &c->func.brw->intel; GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; if (c->key.nr_userclip) { c->reg.fixed_planes = brw_vec4_grf(i, 0); i += (6 + c->key.nr_userclip + 1) / 2; c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; } else c->prog_data.curb_read_length = 0; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } if (c->vue_map.num_slots % 2) { /* The VUE has an odd number of slots so the last register is only half * used. Fill the second half with zero. */ for (j = 0; j < 3; j++) { GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } c->reg.t = brw_vec1_grf(i, 0); c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); i++; c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ c->reg.dp = brw_vec1_grf(i, 4); i++; c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; if (!c->key.nr_userclip) { c->reg.fixed_planes = brw_vec8_grf(i, 0); i++; } if (c->key.do_unfilled) { c->reg.dir = brw_vec4_grf(i, 0); c->reg.offset = brw_vec4_grf(i, 4); i++; c->reg.tmp0 = brw_vec4_grf(i, 0); c->reg.tmp1 = brw_vec4_grf(i, 4); i++; } if (intel->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } c->first_tmp = i; c->last_tmp = i; c->prog_data.urb_read_length = c->nr_regs; /* ? */ c->prog_data.total_grf = i; }
void vec4_gs_visitor::emit_prolog() { /* In vertex shaders, r0.2 is guaranteed to be initialized to zero. In * geometry shaders, it isn't (it contains a bunch of information we don't * need, like the input primitive type). We need r0.2 to be zero in order * to build scratch read/write messages correctly (otherwise this value * will be interpreted as a global offset, causing us to do our scratch * reads/writes to garbage memory). So just set it to zero at the top of * the shader. */ this->current_annotation = "clear r0.2"; dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2_IMMED, r0, 0u); inst->force_writemask_all = true; /* Create a virtual register to hold the vertex count */ this->vertex_count = src_reg(this, glsl_type::uint_type); /* Initialize the vertex_count register to 0 */ this->current_annotation = "initialize vertex_count"; inst = emit(MOV(dst_reg(this->vertex_count), 0u)); inst->force_writemask_all = true; if (c->control_data_header_size_bits > 0) { /* Create a virtual register to hold the current set of control data * bits. */ this->control_data_bits = src_reg(this, glsl_type::uint_type); /* If we're outputting more than 32 control data bits, then EmitVertex() * will set control_data_bits to 0 after emitting the first vertex. * Otherwise, we need to initialize it to 0 here. */ if (c->control_data_header_size_bits <= 32) { this->current_annotation = "initialize control data bits"; inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); inst->force_writemask_all = true; } } /* If the geometry shader uses the gl_PointSize input, we need to fix it up * to account for the fact that the vertex shader stored it in the w * component of VARYING_SLOT_PSIZ. */ if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) { this->current_annotation = "swizzle gl_PointSize input"; for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) { dst_reg dst(ATTR, BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ); dst.type = BRW_REGISTER_TYPE_F; src_reg src(dst); dst.writemask = WRITEMASK_X; src.swizzle = BRW_SWIZZLE_WWWW; inst = emit(MOV(dst, src)); /* In dual instanced dispatch mode, dst has a width of 4, so we need * to make sure the MOV happens regardless of which channels are * enabled. */ inst->force_writemask_all = true; } } this->current_annotation = NULL; }