void gen6_gs_visitor::xfb_setup() { static const unsigned swizzle_for_offset[4] = { BRW_SWIZZLE4(0, 1, 2, 3), BRW_SWIZZLE4(1, 2, 3, 3), BRW_SWIZZLE4(2, 3, 3, 3), BRW_SWIZZLE4(3, 3, 3, 3) }; struct brw_gs_prog_data *prog_data = (struct brw_gs_prog_data *) &c->prog_data; const struct gl_transform_feedback_info *linked_xfb_info = &this->shader_prog->LinkedTransformFeedback; int i; /* Make sure that the VUE slots won't overflow the unsigned chars in * prog_data->transform_feedback_bindings[]. */ STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); /* Make sure that we don't need more binding table entries than we've * set aside for use in transform feedback. (We shouldn't, since we * set aside enough binding table entries to have one per component). */ assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) { prog_data->transform_feedback_bindings[i] = linked_xfb_info->Outputs[i].OutputRegister; prog_data->transform_feedback_swizzles[i] = swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; } }
static bool try_copy_propagate(const struct brw_device_info *devinfo, vec4_instruction *inst, int arg, struct copy_entry *entry) { /* For constant propagation, we only handle the same constant * across all 4 channels. Some day, we should handle the 8-bit * float vector format, which would let us constant propagate * vectors better. */ src_reg value = *entry->value[0]; for (int i = 1; i < 4; i++) { /* This is equals() except we don't care about the swizzle. */ if (value.file != entry->value[i]->file || value.reg != entry->value[i]->reg || value.reg_offset != entry->value[i]->reg_offset || value.type != entry->value[i]->type || value.negate != entry->value[i]->negate || value.abs != entry->value[i]->abs) { return false; } } /* Compute the swizzle of the original register by swizzling the * component loaded from each value according to the swizzle of * operand we're going to change. */ int s[4]; for (int i = 0; i < 4; i++) { s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i); } value.swizzle = brw_compose_swizzle(inst->src[arg].swizzle, BRW_SWIZZLE4(s[0], s[1], s[2], s[3])); if (value.file != UNIFORM && value.file != GRF && value.file != ATTR) return false; if (devinfo->gen >= 8 && (value.negate || value.abs) && is_logic_op(inst->opcode)) { return false; } if (inst->src[arg].abs) { value.negate = false; value.abs = true; } if (inst->src[arg].negate) value.negate = !value.negate; bool has_source_modifiers = value.negate || value.abs; /* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on * instructions. */ if ((has_source_modifiers || value.file == UNIFORM || value.swizzle != BRW_SWIZZLE_XYZW) && !inst->can_do_source_mods(devinfo)) return false; if (has_source_modifiers && value.type != inst->src[arg].type) return false; if (has_source_modifiers && inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE) return false; if (inst->is_3src() && value.file == UNIFORM) return false; if (inst->is_send_from_grf()) return false; /* we can't generally copy-propagate UD negations becuse we * end up accessing the resulting values as signed integers * instead. See also resolve_ud_negate(). */ if (value.negate && value.type == BRW_REGISTER_TYPE_UD) return false; /* Don't report progress if this is a noop. */ if (value.equals(inst->src[arg])) return false; const unsigned dst_saturate_mask = inst->dst.writemask & brw_apply_swizzle_to_mask(inst->src[arg].swizzle, entry->saturatemask); if (dst_saturate_mask) { /* We either saturate all or nothing. */ if (dst_saturate_mask != inst->dst.writemask) return false; /* Limit saturate propagation only to SEL with src1 bounded within 0.0 * and 1.0, otherwise skip copy propagate altogether. */ switch(inst->opcode) { case BRW_OPCODE_SEL: if (arg != 0 || inst->src[0].type != BRW_REGISTER_TYPE_F || inst->src[1].file != IMM || inst->src[1].type != BRW_REGISTER_TYPE_F || inst->src[1].fixed_hw_reg.dw1.f < 0.0 || inst->src[1].fixed_hw_reg.dw1.f > 1.0) { return false; } if (!inst->saturate) inst->saturate = true; break; default: return false; } } value.type = inst->src[arg].type; inst->src[arg] = value; return true; }
static void populate_key(struct brw_context *brw, struct brw_ff_gs_prog_key *key) { static const unsigned swizzle_for_offset[4] = { BRW_SWIZZLE4(0, 1, 2, 3), BRW_SWIZZLE4(1, 2, 3, 3), BRW_SWIZZLE4(2, 3, 3, 3), BRW_SWIZZLE4(3, 3, 3, 3) }; struct gl_context *ctx = &brw->ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG (part of VUE map) */ key->attrs = brw->vs.prog_data->base.vue_map.slots_valid; /* BRW_NEW_PRIMITIVE */ key->primitive = brw->primitive; /* _NEW_LIGHT */ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) { /* Provide consistent primitive order with brw_set_prim's * optimization of single quads to trifans. */ key->pv_first = true; } if (brw->gen >= 7) { /* On Gen7 and later, we don't use GS (yet). */ key->need_gs_prog = false; } else if (brw->gen == 6) { /* On Gen6, GS is used for transform feedback. */ /* BRW_NEW_TRANSFORM_FEEDBACK */ if (_mesa_is_xfb_active_and_unpaused(ctx)) { const struct gl_shader_program *shaderprog = ctx->Shader.CurrentProgram[MESA_SHADER_VERTEX]; const struct gl_transform_feedback_info *linked_xfb_info = &shaderprog->LinkedTransformFeedback; int i; /* Make sure that the VUE slots won't overflow the unsigned chars in * key->transform_feedback_bindings[]. */ STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); /* Make sure that we don't need more binding table entries than we've * set aside for use in transform feedback. (We shouldn't, since we * set aside enough binding table entries to have one per component). */ assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); key->need_gs_prog = true; key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; for (i = 0; i < key->num_transform_feedback_bindings; ++i) { key->transform_feedback_bindings[i] = linked_xfb_info->Outputs[i].OutputRegister; key->transform_feedback_swizzles[i] = swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; } } } else { /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP * into simpler primitives. */ key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST || brw->primitive == _3DPRIM_QUADSTRIP || brw->primitive == _3DPRIM_LINELOOP); } }
void vec4_vs_visitor::emit_prolog() { dst_reg sign_recovery_shift; dst_reg normalize_factor; dst_reg es3_normalize_factor; for (int i = 0; i < VERT_ATTRIB_MAX; i++) { if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) { uint8_t wa_flags = key->gl_attrib_wa_flags[i]; dst_reg reg(ATTR, i); dst_reg reg_d = reg; reg_d.type = BRW_REGISTER_TYPE_D; dst_reg reg_ud = reg; reg_ud.type = BRW_REGISTER_TYPE_UD; /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes * come in as floating point conversions of the integer values. */ if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) { dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1; emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); } /* Do sign recovery for 2101010 formats if required. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { if (sign_recovery_shift.file == BAD_FILE) { /* shift constant: <22,22,22,30> */ sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type); emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); } emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift))); emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift))); } /* Apply BGRA swizzle if required. */ if (wa_flags & BRW_ATTRIB_WA_BGRA) { src_reg temp = src_reg(reg); temp.swizzle = BRW_SWIZZLE4(2,1,0,3); emit(MOV(reg, temp)); } if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) { /* ES 3.0 has different rules for converting signed normalized * fixed-point numbers than desktop GL. */ if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) { /* According to equation 2.2 of the ES 3.0 specification, * signed normalization conversion is done by: * * f = c / (2^(b-1)-1) */ if (es3_normalize_factor.file == BAD_FILE) { /* mul constant: 1 / (2^(b-1) - 1) */ es3_normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<9) - 1)))); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<1) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg(reg_d))); emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor))); emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f)); } else { /* The following equations are from the OpenGL 3.2 specification: * * 2.1 unsigned normalization * f = c/(2^n-1) * * 2.2 signed normalization * f = (2c+1)/(2^n-1) * * Both of these share a common divisor, which is represented by * "normalize_factor" in the code below. */ if (normalize_factor.file == BAD_FILE) { /* 1 / (2^b - 1) for b=<10,10,10,2> */ normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<10) - 1)))); emit(MOV(writemask(normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<2) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); /* For signed normalization, we want the numerator to be 2c+1. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { emit(MUL(dst, src_reg(dst), src_reg(2.0f))); emit(ADD(dst, src_reg(dst), src_reg(1.0f))); } emit(MUL(dst, src_reg(dst), src_reg(normalize_factor))); } } if (wa_flags & BRW_ATTRIB_WA_SCALE) { dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); } } } }
void vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { case nir_intrinsic_load_invocation_id: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD), invocation_id)); break; case nir_intrinsic_load_primitive_id: emit(TCS_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); break; case nir_intrinsic_load_patch_vertices_in: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), brw_imm_d(key->input_vertices))); break; case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]); src_reg vertex_index = vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0])) : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); emit_input_urb_read(dst, vertex_index, imm_offset, nir_intrinsic_component(instr), indirect_offset); break; } case nir_intrinsic_load_input: unreachable("nir_lower_io should use load_per_vertex_input intrinsics"); break; case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; /* This is a read of gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: { /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 0, 0, src_reg()); emit(MOV(writemask(dst, WRITEMASK_XY), swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); break; } case GL_TRIANGLES: /* DWord 4; use offset 1 but normal swizzle/writemask. */ emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0, src_reg()); break; case GL_ISOLINES: /* All channels are undefined. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; unsigned swiz = BRW_SWIZZLE_WZYX; /* This is a read of gl_TessLevelOuter[], which lives in the * high 4 DWords of the Patch URB header, in reverse order. */ switch (key->tes_primitive_mode) { case GL_QUADS: dst.writemask = WRITEMASK_XYZW; break; case GL_TRIANGLES: dst.writemask = WRITEMASK_XYZ; break; case GL_ISOLINES: /* Isolines are not reversed; swizzle .zw -> .xy */ swiz = BRW_SWIZZLE_ZWZW; dst.writemask = WRITEMASK_XY; return; default: unreachable("Bogus tessellation domain"); } dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 1, 0, src_reg()); emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), indirect_offset); } break; } case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: { src_reg value = get_nir_src(instr->src[0]); unsigned mask = instr->const_index[1]; unsigned swiz = BRW_SWIZZLE_XYZW; src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; /* The passthrough shader writes the whole patch header as two vec4s; * skip all the gl_TessLevelInner/Outer swizzling. */ if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) { if (imm_offset == 0) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). * We use an XXYX swizzle to reverse put .xy in the .wz * channels, and use a .zw writemask. */ swiz = BRW_SWIZZLE4(0, 0, 1, 0); mask = writemask_for_backwards_vector(mask); break; case GL_TRIANGLES: /* gl_TessLevelInner[].x lives at DWord 4, so we set the * writemask to X and bump the URB offset by 1. */ imm_offset = 1; break; case GL_ISOLINES: /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelOuter[] which lives in the * Patch URB Header at DWords 4-7. However, it's reversed, so * instead of .xyzw we have .wzyx. */ if (key->tes_primitive_mode == GL_ISOLINES) { /* Isolines .xy should be stored in .zw, in order. */ swiz = BRW_SWIZZLE4(0, 0, 0, 1); mask <<= 2; } else { /* Other domains are reversed; store .wzyx instead of .xyzw. */ swiz = BRW_SWIZZLE_WZYX; mask = writemask_for_backwards_vector(mask); } } } unsigned first_component = nir_intrinsic_component(instr); if (first_component) { assert(swiz == BRW_SWIZZLE_XYZW); swiz = BRW_SWZ_COMP_OUTPUT(first_component); mask = mask << first_component; } emit_urb_write(swizzle(value, swiz), mask, imm_offset, indirect_offset); break; } case nir_intrinsic_barrier: { dst_reg header = dst_reg(this, glsl_type::uvec4_type); emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }
static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { static const unsigned swizzle_for_offset[4] = { BRW_SWIZZLE4(0, 1, 2, 3), BRW_SWIZZLE4(1, 2, 3, 3), BRW_SWIZZLE4(2, 3, 3, 3), BRW_SWIZZLE4(3, 3, 3, 3) }; struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ key->attrs = brw->vs.prog_data->outputs_written; /* BRW_NEW_PRIMITIVE */ key->primitive = brw->primitive; /* _NEW_LIGHT */ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) { /* Provide consistent primitive order with brw_set_prim's * optimization of single quads to trifans. */ key->pv_first = true; } /* _NEW_TRANSFORM */ key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); if (intel->gen >= 7) { /* On Gen7 and later, we don't use GS (yet). */ key->need_gs_prog = false; } else if (intel->gen == 6) { /* On Gen6, GS is used for transform feedback. */ /* _NEW_TRANSFORM_FEEDBACK */ if (ctx->TransformFeedback.CurrentObject->Active && !ctx->TransformFeedback.CurrentObject->Paused) { const struct gl_shader_program *shaderprog = ctx->Shader.CurrentVertexProgram; const struct gl_transform_feedback_info *linked_xfb_info = &shaderprog->LinkedTransformFeedback; int i; /* Make sure that the VUE slots won't overflow the unsigned chars in * key->transform_feedback_bindings[]. */ STATIC_ASSERT(BRW_VERT_RESULT_MAX <= 256); /* Make sure that we don't need more binding table entries than we've * set aside for use in transform feedback. (We shouldn't, since we * set aside enough binding table entries to have one per component). */ assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); key->need_gs_prog = true; key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; for (i = 0; i < key->num_transform_feedback_bindings; ++i) { key->transform_feedback_bindings[i] = linked_xfb_info->Outputs[i].OutputRegister; key->transform_feedback_swizzles[i] = swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; } } /* On Gen6, GS is also used for rasterizer discard. */ /* _NEW_RASTERIZER_DISCARD */ if (ctx->RasterDiscard) { key->need_gs_prog = true; key->rasterizer_discard = true; } } else { /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP * into simpler primitives. */ key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST || brw->primitive == _3DPRIM_QUADSTRIP || brw->primitive == _3DPRIM_LINELOOP); } /* For testing, the environment variable INTEL_FORCE_GS can be used to * force a GS program to be used, even if it's not necessary. */ if (getenv("INTEL_FORCE_GS")) key->need_gs_prog = true; }