void vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { const struct brw_tes_prog_data *tes_prog_data = (const struct brw_tes_prog_data *) prog_data; switch (instr->intrinsic) { case nir_intrinsic_load_tess_coord: /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(brw_vec8_grf(1, 0)))); break; case nir_intrinsic_load_tess_level_outer: if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), BRW_SWIZZLE_ZWZW))); } else { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), BRW_SWIZZLE_WZYX))); } break; case nir_intrinsic_load_tess_level_inner: if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), BRW_SWIZZLE_WZYX))); } else { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(ATTR, 1, glsl_type::float_type))); } break; case nir_intrinsic_load_primitive_id: emit(TES_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); break; case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; src_reg header = input_read_header; bool is_64bit = nir_dest_bit_size(instr->dest) == 64; unsigned first_component = nir_intrinsic_component(instr); if (is_64bit) first_component /= 2; if (indirect_offset.file != BAD_FILE) { header = src_reg(this, glsl_type::uvec4_type); emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), input_read_header, indirect_offset); } else { /* Arbitrarily only push up to 24 vec4 slots worth of data, * which is 12 registers (since each holds 2 vec4 slots). */ const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { const glsl_type *src_glsl_type = is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; src_reg src = src_reg(ATTR, imm_offset, src_glsl_type); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); const brw_reg_type dst_reg_type = is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src)); prog_data->urb_read_length = MAX2(prog_data->urb_read_length, DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); break; } } if (!is_64bit) { dst_reg temp(this, glsl_type::ivec4_type); vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; src_reg src = src_reg(temp); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); /* Copy to target. We might end up with some funky writemasks landing * in here, but we really don't want them in the above pseudo-ops. */ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dst, src)); } else { /* For 64-bit we need to load twice as many 32-bit components, and for * dvec3/4 we need to emit 2 URB Read messages */ dst_reg temp(this, glsl_type::dvec4_type); dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; if (instr->num_components > 2) { read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE), src_reg(header)); read->offset = imm_offset + 1; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; } src_reg temp_as_src = src_reg(temp); temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component); dst_reg shuffled(this, glsl_type::dvec4_type); shuffle_64bit_data(shuffled, temp_as_src, false); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); dst.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dst, src_reg(shuffled))); } break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }
void vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, unsigned base_offset, unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; /* Set up the message header to reference the proper parts of the URB */ dst_reg header = dst_reg(this, glsl_type::uvec4_type); inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header, brw_imm_ud(dst.writemask << first_component), indirect_offset); inst->force_writemask_all = true; vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header)); read->offset = base_offset; read->mlen = 1; read->base_mrf = -1; if (first_component) { /* Read into a temporary and copy with a swizzle and writemask. */ read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type); emit(MOV(dst, swizzle(src_reg(read->dst), BRW_SWZ_COMP_INPUT(first_component)))); } }
void vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, const src_reg &vertex_index, unsigned base_offset, unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; dst_reg temp(this, glsl_type::ivec4_type); temp.type = dst.type; /* Set up the message header to reference the proper parts of the URB */ dst_reg header = dst_reg(this, glsl_type::uvec4_type); inst = emit(TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index, indirect_offset); inst->force_writemask_all = true; /* Read into a temporary, ignoring writemasking. */ inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); inst->offset = base_offset; inst->mlen = 1; inst->base_mrf = -1; /* Copy the temporary to the destination to deal with writemasking. * * Also attempt to deal with gl_PointSize being in the .w component. */ if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW))); } else { src_reg src = src_reg(temp); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); emit(MOV(dst, src)); } }
void vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { dst_reg dest; src_reg src; switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: { /* The EmitNoIndirectInput flag guarantees our vertex index will * be constant. We should handle indirects someday. */ nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); nir_const_value *offset = nir_src_as_const_value(instr->src[1]); /* Make up a type...we have no way of knowing... */ const glsl_type *const type = glsl_type::ivec(instr->num_components); src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + instr->const_index[0] + offset->u32[0], type); src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); /* gl_PointSize is passed in the .w component of the VUE header */ if (instr->const_index[0] == VARYING_SLOT_PSIZ) src.swizzle = BRW_SWIZZLE_WWWW; dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dest, src)); break; } case nir_intrinsic_load_input: unreachable("nir_lower_io should have produced per_vertex intrinsics"); case nir_intrinsic_emit_vertex_with_counter: { this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); int stream_id = instr->const_index[0]; gs_emit_vertex(stream_id); break; } case nir_intrinsic_end_primitive_with_counter: this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); gs_end_primitive(); break; case nir_intrinsic_set_vertex_count: this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); break; case nir_intrinsic_load_primitive_id: assert(gs_prog_data->include_primitive_id); dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D))); break; case nir_intrinsic_load_invocation_id: { src_reg invocation_id = src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]); assert(invocation_id.file != BAD_FILE); dest = get_nir_dest(instr->dest, invocation_id.type); emit(MOV(dest, invocation_id)); break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }