static void brw_wm_xy(struct brw_compile *p, int dw) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = __retype_uw(r1); struct brw_reg x_uw, y_uw; brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (dw == 16) { x_uw = brw_uw16_grf(30, 0); y_uw = brw_uw16_grf(28, 0); } else { x_uw = brw_uw8_grf(30, 0); y_uw = brw_uw8_grf(28, 0); } brw_ADD(p, x_uw, __stride(__suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); brw_ADD(p, y_uw, __stride(__suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); }
static void emit_pixel_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); }
static void emit_pixel_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); struct brw_reg dst0, dst1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; dst0 = get_dst_reg(c, inst, 0, 1); dst1 = get_dst_reg(c, inst, 1, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } }
/** * Computes the screen-space x,y position of the pixels. * * This will be used by emit_delta_xy() or emit_wpos_xy() for * interpolation of attributes.. * * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. * R0.1..8 -- ? * R1.0 -- triangle vertex 0.X * R1.1 -- triangle vertex 0.Y * R1.2 -- tile 0 x,y coords (2 packed uwords) * R1.3 -- tile 1 x,y coords (2 packed uwords) * R1.4 -- tile 2 x,y coords (2 packed uwords) * R1.5 -- tile 3 x,y coords (2 packed uwords) * R1.6 -- ? * R1.7 -- ? * R1.8 -- ? */ void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) { struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); struct brw_reg dst0_uw, dst1_uw; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (c->dispatch_width == 16) { dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); } else { dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); } /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } brw_pop_insn_state(p); }
/** * Computes the screen-space x,y distance of the pixels from the start * vertex. * * This will be used in linterp or pinterp with the start vertex value * and the Cx, Cy, and C0 coefficients passed in from the setup engine * to produce interpolated attribute values. */ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct intel_context *intel = &p->brw->intel; struct brw_reg r1 = brw_vec1_grf(1, 0); if (mask == 0) return; assert(mask == WRITEMASK_XY); if (intel->gen >= 6) { /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1. Just add them with 0.0 for dst reg.. */ r1 = brw_imm_v(0x00000000); brw_ADD(p, dst[0], retype(arg0[0], BRW_REGISTER_TYPE_UW), r1); brw_ADD(p, dst[1], retype(arg0[1], BRW_REGISTER_TYPE_UW), r1); return; } /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers produced by emit_pixel_xy(). */ brw_ADD(p, dst[0], retype(arg0[0], BRW_REGISTER_TYPE_UW), negate(r1)); brw_ADD(p, dst[1], retype(arg0[1], BRW_REGISTER_TYPE_UW), negate(suboffset(r1,1))); }
/** * Generate the geometry shader program used on Gen6 to perform stream output * (transform feedback). */ void gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, unsigned num_verts, bool check_edge_flags) { struct brw_compile *p = &c->func; c->prog_data.svbi_postincrement_value = num_verts; brw_gs_alloc_regs(c, num_verts, true); brw_gs_initialize_header(c); if (key->num_transform_feedback_bindings > 0) { unsigned vertex, binding; struct brw_reg destination_indices_uw = vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); /* Note: since we use the binding table to keep track of buffer offsets * and stride, the GS doesn't need to keep track of a separate pointer * into each buffer; it uses a single pointer which increments by 1 for * each vertex. So we use SVBI0 for this pointer, regardless of whether * transform feedback is in interleaved or separate attribs mode. * * Make sure that the buffers have enough room for all the vertices. */ brw_ADD(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 4)); brw_IF(p, BRW_EXECUTE_1); /* Compute the destination indices to write to. Usually we use SVBI[0] * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the * vertices come down the pipeline in reversed winding order, so we need * to flip the order when writing to the transform feedback buffer. To * ensure that flatshading accuracy is preserved, we need to write them * in order SVBI[0] + (0, 2, 1) if we're using the first provoking * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using * the last provoking vertex convention. * * Note: since brw_imm_v can only be used in instructions in * packed-word execution mode, and SVBI is a double-word, we need to * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), * or (1, 0, 2)) to the destination_indices register, and then add SVBI * using a separate instruction. Also, since the immediate constant is * expressed as packed words, and we need to load double-words into * destination_indices, we need to intersperse zeros to fill the upper * halves of each double-word. */ brw_MOV(p, destination_indices_uw, brw_imm_v(0x00020100)); /* (0, 1, 2) */ if (num_verts == 3) { /* Get primitive type into temp register. */ brw_AND(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as * an 8-wide comparison so that the conditional MOV that follows * moves all 8 words correctly. */ brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, get_element_ud(c->reg.temp, 0), brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); /* If so, then overwrite destination_indices_uw with the appropriate * reordering. */ brw_MOV(p, destination_indices_uw, brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ : 0x00020001)); /* (1, 0, 2) */ brw_set_predicate_control(p, BRW_PREDICATE_NONE); } brw_ADD(p, c->reg.destination_indices, c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ for (vertex = 0; vertex < num_verts; ++vertex) { /* Set up the correct destination index for this vertex */ brw_MOV(p, get_element_ud(c->reg.header, 5), get_element_ud(c->reg.destination_indices, vertex)); for (binding = 0; binding < key->num_transform_feedback_bindings; ++binding) { unsigned char varying = key->transform_feedback_bindings[binding]; unsigned char slot = c->vue_map.varying_to_slot[varying]; /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: * * "Prior to End of Thread with a URB_WRITE, the kernel must * ensure that all writes are complete by sending the final * write as a committed write." */ bool final_write = binding == key->num_transform_feedback_bindings - 1 && vertex == num_verts - 1; struct brw_reg vertex_slot = c->reg.vertex[vertex]; vertex_slot.nr += slot / 2; vertex_slot.subnr = (slot % 2) * 16; /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; brw_set_access_mode(p, BRW_ALIGN_16); brw_MOV(p, stride(c->reg.header, 4, 4, 1), retype(vertex_slot, BRW_REGISTER_TYPE_UD)); brw_set_access_mode(p, BRW_ALIGN_1); brw_svb_write(p, final_write ? c->reg.temp : brw_null_reg(), /* dest */ 1, /* msg_reg_nr */ c->reg.header, /* src0 */ SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */ final_write); /* send_commit_msg */ } } brw_ENDIF(p); /* Now, reinitialize the header register from R0 to restore the parts of * the register that we overwrote while streaming out transform feedback * data. */ brw_gs_initialize_header(c); /* Finally, wait for the write commit to occur so that we can proceed to * other things safely. * * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: * * The write commit does not modify the destination register, but * merely clears the dependency associated with the destination * register. Thus, a simple “mov” instruction using the register as a * source is sufficient to wait for the write commit to occur. */ brw_MOV(p, c->reg.temp, c->reg.temp); } brw_gs_ff_sync(c, 1); /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so * release the URB that was just allocated, and terminate the thread. */ if (key->rasterizer_discard) { brw_gs_terminate(c); return; } brw_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); brw_gs_emit_vue(c, c->reg.vertex[0], true); break; case 2: brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[0], false); brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[1], true); break; case 3: if (check_edge_flags) { /* Only emit vertices 0 and 1 if this is the first triangle of the * polygon. Otherwise they are redundant. */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); brw_IF(p, BRW_EXECUTE_1); } brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[0], false); brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[1], false); if (check_edge_flags) { brw_ENDIF(p); /* Only emit vertex 2 in PRIM_END mode if this is the last triangle * of the polygon. Otherwise leave the primitive incomplete because * there are more polygon vertices coming. */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); } brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_gs_emit_vue(c, c->reg.vertex[2], true); break; } }
void gen6_gs_visitor::xfb_write() { unsigned num_verts; struct brw_gs_prog_data *prog_data = (struct brw_gs_prog_data *) &c->prog_data; if (!prog_data->num_transform_feedback_bindings) return; switch (c->prog_data.output_topology) { case _3DPRIM_POINTLIST: num_verts = 1; break; case _3DPRIM_LINELIST: case _3DPRIM_LINESTRIP: case _3DPRIM_LINELOOP: num_verts = 2; break; case _3DPRIM_TRILIST: case _3DPRIM_TRIFAN: case _3DPRIM_TRISTRIP: case _3DPRIM_RECTLIST: num_verts = 3; break; case _3DPRIM_QUADLIST: case _3DPRIM_QUADSTRIP: case _3DPRIM_POLYGON: num_verts = 3; break; default: unreachable("Unexpected primitive type in Gen6 SOL program."); } this->current_annotation = "gen6 thread end: svb writes init"; emit(MOV(dst_reg(this->vertex_output_offset), 0u)); emit(MOV(dst_reg(this->sol_prim_written), 0u)); /* Check that at least one primitive can be written * * Note: since we use the binding table to keep track of buffer offsets * and stride, the GS doesn't need to keep track of a separate pointer * into each buffer; it uses a single pointer which increments by 1 for * each vertex. So we use SVBI0 for this pointer, regardless of whether * transform feedback is in interleaved or separate attribs mode. */ src_reg sol_temp(this, glsl_type::uvec4_type); emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts))); /* Compare SVBI calculated number with the maximum value, which is * in R1.4 (previously saved in this->max_svbi) for gen6. */ emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE)); emit(IF(BRW_PREDICATE_NORMAL)); { src_reg destination_indices_uw = retype(destination_indices, BRW_REGISTER_TYPE_UW); vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw), brw_imm_v(0x00020100))); /* (0, 1, 2) */ inst->force_writemask_all = true; emit(ADD(dst_reg(this->destination_indices), this->destination_indices, this->svbi)); } emit(BRW_OPCODE_ENDIF); /* Write transform feedback data for all processed vertices. */ for (int i = 0; i < c->gp->program.VerticesOut; i++) { emit(MOV(dst_reg(sol_temp), i)); emit(CMP(dst_null_d(), sol_temp, this->vertex_count, BRW_CONDITIONAL_L)); emit(IF(BRW_PREDICATE_NORMAL)); { xfb_program(i, num_verts); } emit(BRW_OPCODE_ENDIF); } }