static nir_ssa_def * build_atan(nir_builder *b, nir_ssa_def *y_over_x) { nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); nir_ssa_def *one = nir_imm_float(b, 1.0f); /* * range-reduction, first step: * * / y_over_x if |y_over_x| <= 1.0; * x = < * \ 1.0 / y_over_x otherwise */ nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), nir_fmax(b, abs_y_over_x, one)); /* * approximate atan by evaluating polynomial: * * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 */ nir_ssa_def *x_2 = nir_fmul(b, x, x); nir_ssa_def *x_3 = nir_fmul(b, x_2, x); nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); nir_ssa_def *polynomial_terms[] = { nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), }; nir_ssa_def *tmp = build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); /* range-reduction fixup */ tmp = nir_fadd(b, tmp, nir_fmul(b, nir_b2f(b, nir_flt(b, one, abs_y_over_x)), nir_fadd(b, nir_fmul(b, tmp, nir_imm_float(b, -2.0f)), nir_imm_float(b, M_PI_2f)))); /* sign fixup */ return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); }
static void saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) { b->cursor = nir_before_instr(&tex->instr); /* Walk through the sources saturating the requested arguments. */ for (unsigned i = 0; i < tex->num_srcs; i++) { if (tex->src[i].src_type != nir_tex_src_coord) continue; nir_ssa_def *src = nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); /* split src into components: */ nir_ssa_def *comp[4]; for (unsigned j = 0; j < tex->coord_components; j++) comp[j] = nir_channel(b, src, j); /* clamp requested components, array index does not get clamped: */ unsigned ncomp = tex->coord_components; if (tex->is_array) ncomp--; for (unsigned j = 0; j < ncomp; j++) { if ((1 << j) & sat_mask) { if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { /* non-normalized texture coords, so clamp to texture * size rather than [0.0, 1.0] */ nir_ssa_def *txs = get_texture_size(b, tex); comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); } else { comp[j] = nir_fsat(b, comp[j]); } } } /* and move the result back into a single vecN: */ src = nir_vec(b, comp, tex->coord_components); nir_instr_rewrite_src(&tex->instr, &tex->src[i].src, nir_src_for_ssa(src)); } }
static inline nir_ssa_def * build_fclamp(nir_builder *b, nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) { return nir_fmin(b, nir_fmax(b, x, min_val), max_val); }