void vec4_tcs_visitor::emit_thread_end() { vec4_instruction *inst; current_annotation = "thread end"; if (nir->info->tcs.vertices_out % 2) { emit(BRW_OPCODE_ENDIF); } if (devinfo->gen == 7) { struct brw_tcs_prog_data *tcs_prog_data = (struct brw_tcs_prog_data *) prog_data; current_annotation = "release input vertices"; /* Synchronize all threads, so we know that no one is still * using the input URB handles. */ if (tcs_prog_data->instances > 1) { dst_reg header = dst_reg(this, glsl_type::uvec4_type); emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); } /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles. * We want to compare the bottom half of invocation_id with 0, but * use that truth value for the top half as well. Unfortunately, * we don't have stride in the vec4 world, nor UV immediates in * align16, so we need an opcode to get invocation_id<0,4,0>. */ set_condmod(BRW_CONDITIONAL_Z, emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id)); emit(IF(BRW_PREDICATE_NORMAL)); for (unsigned i = 0; i < key->input_vertices; i += 2) { /* If we have an odd number of input vertices, the last will be * unpaired. We don't want to use an interleaved URB write in * that case. */ const bool is_unpaired = i == key->input_vertices - 1; dst_reg header(this, glsl_type::uvec4_type); emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i), brw_imm_ud(is_unpaired)); } emit(BRW_OPCODE_ENDIF); } if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME)) emit_shader_time_end(); inst = emit(TCS_OPCODE_THREAD_END); inst->base_mrf = 14; inst->mlen = 2; }
void vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { case nir_intrinsic_load_invocation_id: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD), invocation_id)); break; case nir_intrinsic_load_primitive_id: emit(TCS_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); break; case nir_intrinsic_load_patch_vertices_in: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), brw_imm_d(key->input_vertices))); break; case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]); src_reg vertex_index = vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0])) : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); emit_input_urb_read(dst, vertex_index, imm_offset, nir_intrinsic_component(instr), indirect_offset); break; } case nir_intrinsic_load_input: unreachable("nir_lower_io should use load_per_vertex_input intrinsics"); break; case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; /* This is a read of gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: { /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 0, 0, src_reg()); emit(MOV(writemask(dst, WRITEMASK_XY), swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); break; } case GL_TRIANGLES: /* DWord 4; use offset 1 but normal swizzle/writemask. */ emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0, src_reg()); break; case GL_ISOLINES: /* All channels are undefined. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; unsigned swiz = BRW_SWIZZLE_WZYX; /* This is a read of gl_TessLevelOuter[], which lives in the * high 4 DWords of the Patch URB header, in reverse order. */ switch (key->tes_primitive_mode) { case GL_QUADS: dst.writemask = WRITEMASK_XYZW; break; case GL_TRIANGLES: dst.writemask = WRITEMASK_XYZ; break; case GL_ISOLINES: /* Isolines are not reversed; swizzle .zw -> .xy */ swiz = BRW_SWIZZLE_ZWZW; dst.writemask = WRITEMASK_XY; return; default: unreachable("Bogus tessellation domain"); } dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 1, 0, src_reg()); emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), indirect_offset); } break; } case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: { src_reg value = get_nir_src(instr->src[0]); unsigned mask = instr->const_index[1]; unsigned swiz = BRW_SWIZZLE_XYZW; src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; /* The passthrough shader writes the whole patch header as two vec4s; * skip all the gl_TessLevelInner/Outer swizzling. */ if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) { if (imm_offset == 0) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). * We use an XXYX swizzle to reverse put .xy in the .wz * channels, and use a .zw writemask. */ swiz = BRW_SWIZZLE4(0, 0, 1, 0); mask = writemask_for_backwards_vector(mask); break; case GL_TRIANGLES: /* gl_TessLevelInner[].x lives at DWord 4, so we set the * writemask to X and bump the URB offset by 1. */ imm_offset = 1; break; case GL_ISOLINES: /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelOuter[] which lives in the * Patch URB Header at DWords 4-7. However, it's reversed, so * instead of .xyzw we have .wzyx. */ if (key->tes_primitive_mode == GL_ISOLINES) { /* Isolines .xy should be stored in .zw, in order. */ swiz = BRW_SWIZZLE4(0, 0, 0, 1); mask <<= 2; } else { /* Other domains are reversed; store .wzyx instead of .xyzw. */ swiz = BRW_SWIZZLE_WZYX; mask = writemask_for_backwards_vector(mask); } } } unsigned first_component = nir_intrinsic_component(instr); if (first_component) { assert(swiz == BRW_SWIZZLE_XYZW); swiz = BRW_SWZ_COMP_OUTPUT(first_component); mask = mask << first_component; } emit_urb_write(swizzle(value, swiz), mask, imm_offset, indirect_offset); break; } case nir_intrinsic_barrier: { dst_reg header = dst_reg(this, glsl_type::uvec4_type); emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }
void vec4_gs_visitor::gs_emit_vertex(int stream_id) { this->current_annotation = "emit vertex: safety check"; /* Haswell and later hardware ignores the "Render Stream Select" bits * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled, * and instead sends all primitives down the pipeline for rasterization. * If the SOL stage is enabled, "Render Stream Select" is honored and * primitives bound to non-zero streams are discarded after stream output. * * Since the only purpose of primives sent to non-zero streams is to * be recorded by transform feedback, we can simply discard all geometry * bound to these streams when transform feedback is disabled. */ if (stream_id > 0 && !nir->info->has_transform_feedback_varyings) return; /* If we're outputting 32 control data bits or less, then we can wait * until the shader is over to output them all. Otherwise we need to * output them as we go. Now is the time to do it, since we're about to * output the vertex_count'th vertex, so it's guaranteed that the * control data bits associated with the (vertex_count - 1)th vertex are * correct. */ if (c->control_data_header_size_bits > 32) { this->current_annotation = "emit vertex: emit control data bits"; /* Only emit control data bits if we've finished accumulating a batch * of 32 bits. This is the case when: * * (vertex_count * bits_per_vertex) % 32 == 0 * * (in other words, when the last 5 bits of vertex_count * * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some * integer n (which is always the case, since bits_per_vertex is * always 1 or 2), this is equivalent to requiring that the last 5-n * bits of vertex_count are 0: * * vertex_count & (2^(5-n) - 1) == 0 * * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is * equivalent to: * * vertex_count & (32 / bits_per_vertex - 1) == 0 */ vec4_instruction *inst = emit(AND(dst_null_ud(), this->vertex_count, brw_imm_ud(32 / c->control_data_bits_per_vertex - 1))); inst->conditional_mod = BRW_CONDITIONAL_Z; emit(IF(BRW_PREDICATE_NORMAL)); { /* If vertex_count is 0, then no control data bits have been * accumulated yet, so we skip emitting them. */ emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ)); emit(IF(BRW_PREDICATE_NORMAL)); emit_control_data_bits(); emit(BRW_OPCODE_ENDIF); /* Reset control_data_bits to 0 so we can start accumulating a new * batch. * * Note: in the case where vertex_count == 0, this neutralizes the * effect of any call to EndPrimitive() that the shader may have * made before outputting its first vertex. */ inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u))); inst->force_writemask_all = true; } emit(BRW_OPCODE_ENDIF); } this->current_annotation = "emit vertex: vertex data"; emit_vertex(); /* In stream mode we have to set control data bits for all vertices * unless we have disabled control data bits completely (which we do * do for GL_POINTS outputs that don't use streams). */ if (c->control_data_header_size_bits > 0 && gs_prog_data->control_data_format == GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { this->current_annotation = "emit vertex: Stream control data bits"; set_stream_control_data_bits(stream_id); } this->current_annotation = NULL; }