void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp2_int_part, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; LLVMValueRef fpart = NULL; LLVMValueRef expipart = NULL; LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); /* ipart = int(x - 0.5) */ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); fpart = LLVMBuildSub(bld->builder, x, fpart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } if(p_exp2) { expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) *p_exp2_int_part = expipart; if(p_frac_part) *p_frac_part = fpart; if(p_exp2) *p_exp2 = res; }
/** * Convert srgb int values to linear float values. * Several possibilities how to do this, e.g. * - table * - doing the pow() with int-to-float and float-to-int tricks * (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent) * - just using standard polynomial approximation * (3rd order polynomial is required for crappy but just sufficient accuracy) * * @param src integer (vector) value(s) to convert * (chan_bits bit values unpacked to 32 bit already). */ LLVMValueRef lp_build_srgb_to_linear(struct gallivm_state *gallivm, struct lp_type src_type, unsigned chan_bits, LLVMValueRef src) { struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32); struct lp_build_context f32_bld; LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh; double coeffs[4] = {0.0023f, 0.0030f / 255.0f, 0.6935f / (255.0f * 255.0f), 0.3012f / (255.0f * 255.0f * 255.0f) }; assert(src_type.width == 32); /* Technically this would work with more bits too but would be inaccurate. */ assert(chan_bits <= 8); lp_build_context_init(&f32_bld, gallivm, f32_type); /* * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023) * ( poly = 0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023) * (found with octave polyfit and some magic as I couldn't get the error * function right). Using the above mentioned error function, the values stay * within +-0.35, except for the lowest values - hence tweaking linear segment * to cover the first 16 instead of the first 11 values (the error stays * just about acceptable there too). * Hence: lin = src > 15 ? poly : src / 12.6 * This function really only makes sense for vectors, should use LUT otherwise. * All in all (including float conversion) 11 instructions (with sse4.1), * 6 constants (polynomial could be done with 1 instruction less at the cost * of slightly worse dependency chain, fma should also help). */ /* doing the 1/255 mul as part of the approximation */ srcf = lp_build_int_to_float(&f32_bld, src); if (chan_bits != 8) { /* could adjust all the constants instead */ LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type, 255.0f / ((1 << chan_bits) - 1)); srcf = lp_build_mul(&f32_bld, srcf, rescale_const); } lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f)); part_lin = lp_build_mul(&f32_bld, srcf, lin_const); part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4); lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f); is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh); return lp_build_select(&f32_bld, is_linear, part_lin, part_pow); }
/** * See http://www.devmaster.net/forums/showthread.php?p=43580 */ void lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; LLVMValueRef exp = NULL; LLVMValueRef mant = NULL; LLVMValueRef logexp = NULL; LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ exp = LLVMBuildAnd(bld->builder, i, expmask, ""); } if(p_floor_log2 || p_log2) { logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } if(p_log2) { /* mant = (float) mantissa(x) */ mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); mant = LLVMBuildOr(bld->builder, mant, one, ""); mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } if(p_exp) *p_exp = exp; if(p_floor_log2) *p_floor_log2 = logexp; if(p_log2) *p_log2 = res; }