void lp_build_alpha_test(struct gallivm_state *gallivm, unsigned func, struct lp_type type, const struct util_format_description *cbuf_format_desc, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref, boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; lp_build_context_init(&bld, gallivm, type); /* * Alpha testing needs to be done in the color buffer precision. * * TODO: Ideally, instead of duplicating the color conversion code, we would do * alpha testing after converting the output colors, but that's not very * convenient, because it needs to be done before depth testing. Hopefully * LLVM will detect and remove the duplicate expression. * * FIXME: This should be generalized to formats other than rgba8 variants. */ if (type.floating && util_format_is_rgba8_variant(cbuf_format_desc)) { const unsigned dst_width = 8; alpha = lp_build_clamp(&bld, alpha, bld.zero, bld.one); ref = lp_build_clamp(&bld, ref, bld.zero, bld.one); alpha = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, alpha); ref = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, ref); type.floating = 0; lp_build_context_init(&bld, gallivm, type); } test = lp_build_cmp(&bld, func, alpha, ref); lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); if (do_branch) lp_build_mask_check(mask); }
/** * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer * mipmap level index. * Note: this is all scalar code. * \param lod scalar float texture level of detail * \param level_out returns integer */ void lp_build_nearest_mip_level(struct lp_build_sample_context *bld, unsigned unit, LLVMValueRef lod_ipart, LLVMValueRef *level_out) { struct lp_build_context *int_bld = &bld->int_bld; LLVMValueRef first_level, last_level, level; first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, unit); last_level = bld->dynamic_state->last_level(bld->dynamic_state, bld->gallivm, unit); /* convert float lod to integer */ level = lp_build_add(int_bld, lod_ipart, first_level); /* clamp level to legal range of levels */ *level_out = lp_build_clamp(int_bld, level, first_level, last_level); }
/** * Convert linear float soa values to packed srgb AoS values. * This only handles packed formats which are 4x8bit in size * (rgba and rgbx plus swizzles). * * @param src float SoA (vector) values to convert. */ LLVMValueRef lp_build_float_to_srgb_packed(struct gallivm_state *gallivm, const struct util_format_description *dst_fmt, struct lp_type src_type, LLVMValueRef *src) { LLVMBuilderRef builder = gallivm->builder; unsigned chan; struct lp_build_context f32_bld; struct lp_type int32_type = lp_int_type(src_type); LLVMValueRef tmpsrgb[4], alpha, dst; lp_build_context_init(&f32_bld, gallivm, src_type); /* rgb is subject to linear->srgb conversion, alpha is not */ for (chan = 0; chan < 3; chan++) { tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]); } /* * can't use lp_build_conv since we want to keep values as 32bit * here so we can interleave with rgb to go from SoA->AoS. */ alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one); alpha = lp_build_mul(&f32_bld, alpha, lp_build_const_vec(gallivm, src_type, 255.0f)); tmpsrgb[3] = lp_build_iround(&f32_bld, alpha); dst = lp_build_zero(gallivm, int32_type); for (chan = 0; chan < dst_fmt->nr_channels; chan++) { if (dst_fmt->swizzle[chan] <= UTIL_FORMAT_SWIZZLE_W) { unsigned ls; LLVMValueRef shifted, shift_val; ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift; shift_val = lp_build_const_int_vec(gallivm, int32_type, ls); shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, ""); dst = LLVMBuildOr(builder, dst, shifted, ""); } } return dst; }
static INLINE void yuv_to_rgb_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; struct lp_build_context bld; LLVMValueRef c0; LLVMValueRef c8; LLVMValueRef c16; LLVMValueRef c128; LLVMValueRef c255; LLVMValueRef cy; LLVMValueRef cug; LLVMValueRef cub; LLVMValueRef cvr; LLVMValueRef cvg; memset(&type, 0, sizeof type); type.sign = TRUE; type.width = 32; type.length = n; lp_build_context_init(&bld, gallivm, type); assert(lp_check_value(type, y)); assert(lp_check_value(type, u)); assert(lp_check_value(type, v)); /* * Constants */ c0 = lp_build_const_int_vec(gallivm, type, 0); c8 = lp_build_const_int_vec(gallivm, type, 8); c16 = lp_build_const_int_vec(gallivm, type, 16); c128 = lp_build_const_int_vec(gallivm, type, 128); c255 = lp_build_const_int_vec(gallivm, type, 255); cy = lp_build_const_int_vec(gallivm, type, 298); cug = lp_build_const_int_vec(gallivm, type, -100); cub = lp_build_const_int_vec(gallivm, type, 516); cvr = lp_build_const_int_vec(gallivm, type, 409); cvg = lp_build_const_int_vec(gallivm, type, -208); /* * y -= 16; * u -= 128; * v -= 128; */ y = LLVMBuildSub(builder, y, c16, ""); u = LLVMBuildSub(builder, u, c128, ""); v = LLVMBuildSub(builder, v, c128, ""); /* * r = 298 * _y + 409 * _v + 128; * g = 298 * _y - 100 * _u - 208 * _v + 128; * b = 298 * _y + 516 * _u + 128; */ y = LLVMBuildMul(builder, y, cy, ""); y = LLVMBuildAdd(builder, y, c128, ""); *r = LLVMBuildMul(builder, v, cvr, ""); *g = LLVMBuildAdd(builder, LLVMBuildMul(builder, u, cug, ""), LLVMBuildMul(builder, v, cvg, ""), ""); *b = LLVMBuildMul(builder, u, cub, ""); *r = LLVMBuildAdd(builder, *r, y, ""); *g = LLVMBuildAdd(builder, *g, y, ""); *b = LLVMBuildAdd(builder, *b, y, ""); /* * r >>= 8; * g >>= 8; * b >>= 8; */ *r = LLVMBuildAShr(builder, *r, c8, "r"); *g = LLVMBuildAShr(builder, *g, c8, "g"); *b = LLVMBuildAShr(builder, *b, c8, "b"); /* * Clamp */ *r = lp_build_clamp(&bld, *r, c0, c255); *g = lp_build_clamp(&bld, *g, c0, c255); *b = lp_build_clamp(&bld, *b, c0, c255); }
/** * Convert linear float values to srgb int values. * Several possibilities how to do this, e.g. * - use table (based on exponent/highest order mantissa bits) and do * linear interpolation (https://gist.github.com/rygorous/2203834) * - Chebyshev polynomial * - Approximation using reciprocals * - using int-to-float and float-to-int tricks for pow() * (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent) * * @param src float (vector) value(s) to convert. */ static LLVMValueRef lp_build_linear_to_srgb(struct gallivm_state *gallivm, struct lp_type src_type, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context f32_bld; LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final; lp_build_context_init(&f32_bld, gallivm, src_type); src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one); if (0) { /* * using int-to-float and float-to-int trick for pow(). * This is much more accurate than necessary thanks to the correction, * but it most certainly makes no sense without rsqrt available. * Bonus points if you understand how this works... * All in all (including min/max clamp, conversion) 19 instructions. */ float exp_f = 2.0f / 3.0f; /* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */ float exp2f_c = 1.30438178253e+19f; float coeff_f = 0.62996f; LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2; struct lp_type int_type = lp_int_type(src_type); /* * First calculate approx x^8/12 */ exponent = lp_build_const_vec(gallivm, src_type, exp_f); coeff = lp_build_const_vec(gallivm, src_type, exp2f_c * powf(coeff_f, 1.0f / exp_f)); /* premultiply src */ tmp = lp_build_mul(&f32_bld, coeff, src); /* "log2" */ tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), ""); tmp = lp_build_int_to_float(&f32_bld, tmp); /* multiply for pow */ tmp = lp_build_mul(&f32_bld, tmp, exponent); /* "exp2" */ pow_approx = lp_build_itrunc(&f32_bld, tmp); pow_approx = LLVMBuildBitCast(builder, pow_approx, lp_build_vec_type(gallivm, src_type), ""); /* * Since that pow was inaccurate (like 3 bits, though each sqrt step would * give another bit), compensate the error (which is why we chose another * exponent in the first place). */ /* x * x^(8/12) = x^(20/12) */ pow_1 = lp_build_mul(&f32_bld, pow_approx, src); /* x * x * x^(-4/12) = x^(20/12) */ /* Should avoid using rsqrt if it's not available, but * using x * x^(4/12) * x^(4/12) instead will change error weight */ tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx); x2 = lp_build_mul(&f32_bld, src, src); pow_2 = lp_build_mul(&f32_bld, x2, tmp); /* average the values so the errors cancel out, compensate bias, * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul * for conversion to int in here */ tmp = lp_build_add(&f32_bld, pow_1, pow_2); coeff = lp_build_const_vec(gallivm, src_type, 1.0f / (3.0f * coeff_f) * 0.999852f * powf(1.055f * 255.0f, 4.0f)); pow_final = lp_build_mul(&f32_bld, tmp, coeff); /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */ if (lp_build_fast_rsqrt_available(src_type)) { pow_final = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, pow_final)); } else { pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final)); } pow_final = lp_build_add(&f32_bld, pow_final, lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f)); } else { /* * using "rational polynomial" approximation here. * Essentially y = a*x^0.375 + b*x^0.5 + c, with also * factoring in the 255.0 mul and the scaling mul. * (a is closer to actual value so has higher weight than b.) * Note: the constants are magic values. They were found empirically, * possibly could be improved but good enough (be VERY careful with * error metric if you'd want to tweak them, they also MUST fit with * the crappy polynomial above for srgb->linear since it is required * that each srgb value maps back to the same value). * This function has an error of max +-0.17 (and we'd only require +-0.6), * for the approximated srgb->linear values the error is naturally larger * (+-0.42) but still accurate enough (required +-0.5 essentially). * All in all (including min/max clamp, conversion) 15 instructions. * FMA would help (minus 2 instructions). */ LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2; if (lp_build_fast_rsqrt_available(src_type)) { tmp = lp_build_fast_rsqrt(&f32_bld, src); x05 = lp_build_mul(&f32_bld, src, tmp); } else { /* * I don't really expect this to be practical without rsqrt * but there's no reason for triple punishment so at least * save the otherwise resulting division and unnecessary mul... */ x05 = lp_build_sqrt(&f32_bld, src); } tmp = lp_build_mul(&f32_bld, x05, src); if (lp_build_fast_rsqrt_available(src_type)) { x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp)); } else { x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp)); } a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f); b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f); c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f); tmp = lp_build_mul(&f32_bld, a_const, x0375); tmp2 = lp_build_mul(&f32_bld, b_const, x05); tmp2 = lp_build_add(&f32_bld, tmp2, c_const); pow_final = lp_build_add(&f32_bld, tmp, tmp2); } /* linear part is easy */ lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f); lin = lp_build_mul(&f32_bld, src, lin_const); lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f); is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh); tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final); f32_bld.type.sign = 0; return lp_build_iround(&f32_bld, tmp); }