static bool constant_fold_deref(nir_instr *instr, nir_deref_var *deref) { bool progress = false; for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { if (tail->deref_type != nir_deref_type_array) continue; nir_deref_array *arr = nir_deref_as_array(tail); if (arr->deref_array_type == nir_deref_array_type_indirect && arr->indirect.is_ssa && arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) { nir_load_const_instr *indirect = nir_instr_as_load_const(arr->indirect.ssa->parent_instr); arr->base_offset += indirect->value.u[0]; /* Clear out the source */ nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL)); arr->deref_array_type = nir_deref_array_type_direct; progress = true; } } return progress; }
static bool constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) { nir_const_value src[4]; if (!instr->dest.dest.is_ssa) return false; for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { if (!instr->src[i].src.is_ssa) return false; nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; if (src_instr->type != nir_instr_type_load_const) return false; nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i); j++) { src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]]; } /* We shouldn't have any source modifiers in the optimization loop. */ assert(!instr->src[i].abs && !instr->src[i].negate); } /* We shouldn't have any saturate modifiers in the optimization loop. */ assert(!instr->dest.saturate); nir_const_value dest = nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, src); nir_load_const_instr *new_instr = nir_load_const_instr_create(mem_ctx, instr->dest.dest.ssa.num_components); new_instr->value = dest; nir_instr_insert_before(&instr->instr, &new_instr->instr); nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def), mem_ctx); nir_instr_remove(&instr->instr); ralloc_free(instr); return true; }
static void validate_instr(nir_instr *instr, validate_state *state) { assert(instr->block == state->block); state->instr = instr; switch (instr->type) { case nir_instr_type_alu: validate_alu_instr(nir_instr_as_alu(instr), state); break; case nir_instr_type_call: validate_call_instr(nir_instr_as_call(instr), state); break; case nir_instr_type_intrinsic: validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state); break; case nir_instr_type_tex: validate_tex_instr(nir_instr_as_tex(instr), state); break; case nir_instr_type_load_const: validate_load_const_instr(nir_instr_as_load_const(instr), state); break; case nir_instr_type_phi: validate_phi_instr(nir_instr_as_phi(instr), state); break; case nir_instr_type_ssa_undef: validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); break; case nir_instr_type_jump: break; default: assert(!"Invalid ALU instruction type"); break; } state->instr = NULL; }
nir_foreach_instr(block, instr) { switch (instr->type) { case nir_instr_type_alu: { /* For ALU instructions, the resolve status is handled in a * three-step process. * * 1) Look at the instruction type and sources and determine if it * can be left unresolved. * * 2) Look at the destination and see if we have to resolve * anyway. (This is the case if this instruction is not the * only instruction writing to a given register.) * * 3) If the instruction has a resolve status other than * BOOL_UNRESOLVED or BOOL_NEEDS_RESOLVE then we walk through * the sources and ensure that they are also resolved. This * ensures that we don't end up with any stray unresolved * booleans going into ADDs or something like that. */ uint8_t resolve_status; nir_alu_instr *alu = nir_instr_as_alu(instr); switch (alu->op) { case nir_op_bany2: case nir_op_bany3: case nir_op_bany4: case nir_op_ball_fequal2: case nir_op_ball_iequal2: case nir_op_ball_fequal3: case nir_op_ball_iequal3: case nir_op_ball_fequal4: case nir_op_ball_iequal4: case nir_op_bany_fnequal2: case nir_op_bany_inequal2: case nir_op_bany_fnequal3: case nir_op_bany_inequal3: case nir_op_bany_fnequal4: case nir_op_bany_inequal4: /* These are only implemented by the vec4 backend and its * implementation emits resolved booleans. At some point in the * future, this may change and we'll have to remove some of the * above cases. */ resolve_status = BRW_NIR_BOOLEAN_NO_RESOLVE; break; case nir_op_imov: case nir_op_inot: /* This is a single-source instruction. Just copy the resolve * status from the source. */ resolve_status = get_resolve_status_for_src(&alu->src[0].src); break; case nir_op_iand: case nir_op_ior: case nir_op_ixor: { uint8_t src0_status = get_resolve_status_for_src(&alu->src[0].src); uint8_t src1_status = get_resolve_status_for_src(&alu->src[1].src); if (src0_status == src1_status) { resolve_status = src0_status; } else if (src0_status == BRW_NIR_NON_BOOLEAN || src1_status == BRW_NIR_NON_BOOLEAN) { /* If one of the sources is a non-boolean then the whole * thing is a non-boolean. */ resolve_status = BRW_NIR_NON_BOOLEAN; } else { /* At this point one of them is a true boolean and one is a * boolean that needs a resolve. We could either resolve the * unresolved source or we could resolve here. If we resolve * the unresolved source then we get two resolves for the price * of one. Just set this one to BOOLEAN_NO_RESOLVE and we'll * let the code below force a resolve on the unresolved source. */ resolve_status = BRW_NIR_BOOLEAN_NO_RESOLVE; } break; } default: if (nir_op_infos[alu->op].output_type == nir_type_bool) { /* This instructions will turn into a CMP when we actually emit * them so the result will have to be resolved before it can be * used. */ resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED; /* Even though the destination is allowed to be left * unresolved, the sources are treated as regular integers or * floats so they need to be resolved. */ nir_foreach_src(instr, src_mark_needs_resolve, NULL); } else { resolve_status = BRW_NIR_NON_BOOLEAN; } } /* If the destination is SSA, go ahead allow unresolved booleans. * If the destination register doesn't have a well-defined parent_instr * we need to resolve immediately. */ if (!alu->dest.dest.is_ssa && resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) { resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE; } instr->pass_flags = (instr->pass_flags & ~BRW_NIR_BOOLEAN_MASK) | resolve_status; /* Finally, resolve sources if it's needed */ switch (resolve_status) { case BRW_NIR_BOOLEAN_NEEDS_RESOLVE: case BRW_NIR_BOOLEAN_UNRESOLVED: /* This instruction is either unresolved or we're doing the * resolve here; leave the sources alone. */ break; case BRW_NIR_BOOLEAN_NO_RESOLVE: case BRW_NIR_NON_BOOLEAN: nir_foreach_src(instr, src_mark_needs_resolve, NULL); break; default: unreachable("Invalid boolean flag"); } break; } case nir_instr_type_load_const: { nir_load_const_instr *load = nir_instr_as_load_const(instr); /* For load_const instructions, it's a boolean exactly when it holds * one of the values NIR_TRUE or NIR_FALSE. * * Since load_const instructions don't have any sources, we don't * have to worry about resolving them. */ instr->pass_flags &= ~BRW_NIR_BOOLEAN_MASK; if (load->value.u[0] == NIR_TRUE || load->value.u[0] == NIR_FALSE) { instr->pass_flags |= BRW_NIR_BOOLEAN_NO_RESOLVE; } else { instr->pass_flags |= BRW_NIR_NON_BOOLEAN; } continue; } default: /* Everything else is an unknown non-boolean value and needs to * have all sources resolved. */ instr->pass_flags = (instr->pass_flags & ~BRW_NIR_BOOLEAN_MASK) | BRW_NIR_NON_BOOLEAN; nir_foreach_src(instr, src_mark_needs_resolve, NULL); continue; } }
static bool constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) { nir_const_value src[NIR_MAX_VEC_COMPONENTS]; if (!instr->dest.dest.is_ssa) return false; /* In the case that any outputs/inputs have unsized types, then we need to * guess the bit-size. In this case, the validator ensures that all * bit-sizes match so we can just take the bit-size from first * output/input with an unsized type. If all the outputs/inputs are sized * then we don't need to guess the bit-size at all because the code we * generate for constant opcodes in this case already knows the sizes of * the types involved and does not need the provided bit-size for anything * (although it still requires to receive a valid bit-size). */ unsigned bit_size = 0; if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type)) bit_size = instr->dest.dest.ssa.bit_size; for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { if (!instr->src[i].src.is_ssa) return false; if (bit_size == 0 && !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) { bit_size = instr->src[i].src.ssa->bit_size; } nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; if (src_instr->type != nir_instr_type_load_const) return false; nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i); j++) { switch(load_const->def.bit_size) { case 64: src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]]; break; case 32: src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]]; break; case 16: src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]]; break; case 8: src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]]; break; default: unreachable("Invalid bit size"); } } /* We shouldn't have any source modifiers in the optimization loop. */ assert(!instr->src[i].abs && !instr->src[i].negate); } if (bit_size == 0) bit_size = 32; /* We shouldn't have any saturate modifiers in the optimization loop. */ assert(!instr->dest.saturate); nir_const_value dest = nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, bit_size, src); nir_load_const_instr *new_instr = nir_load_const_instr_create(mem_ctx, instr->dest.dest.ssa.num_components, instr->dest.dest.ssa.bit_size); new_instr->value = dest; nir_instr_insert_before(&instr->instr, &new_instr->instr); nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def)); nir_instr_remove(&instr->instr); ralloc_free(instr); return true; }
nir_foreach_instr_safe(block, instr) { if (instr->type == nir_instr_type_load_const) lower_load_const_instr_scalar(nir_instr_as_load_const(instr)); }
static bool match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle, struct match_state *state) { uint8_t new_swizzle[4]; /* If the source is an explicitly sized source, then we need to reset * both the number of components and the swizzle. */ if (nir_op_infos[instr->op].input_sizes[src] != 0) { num_components = nir_op_infos[instr->op].input_sizes[src]; swizzle = identity_swizzle; } for (int i = 0; i < num_components; ++i) new_swizzle[i] = instr->src[src].swizzle[swizzle[i]]; switch (value->type) { case nir_search_value_expression: if (!instr->src[src].src.is_ssa) return false; if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) return false; return match_expression(nir_search_value_as_expression(value), nir_instr_as_alu(instr->src[src].src.ssa->parent_instr), num_components, new_swizzle, state); case nir_search_value_variable: { nir_search_variable *var = nir_search_value_as_variable(value); assert(var->variable < NIR_SEARCH_MAX_VARIABLES); if (state->variables_seen & (1 << var->variable)) { if (!nir_srcs_equal(state->variables[var->variable].src, instr->src[src].src)) return false; assert(!instr->src[src].abs && !instr->src[src].negate); for (int i = 0; i < num_components; ++i) { if (state->variables[var->variable].swizzle[i] != new_swizzle[i]) return false; } return true; } else { if (var->is_constant && instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) return false; if (var->type != nir_type_invalid) { if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) return false; nir_alu_instr *src_alu = nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); if (nir_op_infos[src_alu->op].output_type != var->type && !(var->type == nir_type_bool && alu_instr_is_bool(src_alu))) return false; } state->variables_seen |= (1 << var->variable); state->variables[var->variable].src = instr->src[src].src; state->variables[var->variable].abs = false; state->variables[var->variable].negate = false; for (int i = 0; i < 4; ++i) { if (i < num_components) state->variables[var->variable].swizzle[i] = new_swizzle[i]; else state->variables[var->variable].swizzle[i] = 0; } return true; } } case nir_search_value_constant: { nir_search_constant *const_val = nir_search_value_as_constant(value); if (!instr->src[src].src.is_ssa) return false; if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) return false; nir_load_const_instr *load = nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr); switch (nir_op_infos[instr->op].input_types[src]) { case nir_type_float: for (unsigned i = 0; i < num_components; ++i) { if (load->value.f[new_swizzle[i]] != const_val->data.f) return false; } return true; case nir_type_int: case nir_type_unsigned: case nir_type_bool: for (unsigned i = 0; i < num_components; ++i) { if (load->value.i[new_swizzle[i]] != const_val->data.i) return false; } return true; default: unreachable("Invalid alu source type"); } } default: unreachable("Invalid search value type"); } }