/** * Build a manual selection sequence for cube face sc/tc coordinates and * major axis vector (multiplied by 2 for consistency) for the given * vec3 \p coords, for the face implied by \p selcoords. * * For the major axis, we always adjust the sign to be in the direction of * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards * the selcoords major axis. */ static void build_cube_select(LLVMBuilderRef builder, const struct cube_selection_coords *selcoords, const LLVMValueRef *coords, LLVMValueRef *out_st, LLVMValueRef *out_ma) { LLVMTypeRef f32 = LLVMTypeOf(coords[0]); LLVMValueRef is_ma_positive; LLVMValueRef sgn_ma; LLVMValueRef is_ma_z, is_not_ma_z; LLVMValueRef is_ma_y; LLVMValueRef is_ma_x; LLVMValueRef sgn; LLVMValueRef tmp; is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), ""); sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); /* Select sc */ tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), LLVMBuildSelect(builder, is_ma_x, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); /* Select tc */ tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""), LLVMConstReal(f32, -1.0), ""); out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); /* Select ma */ tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); sgn = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); }
/* Perform front/back face culling and return true if the primitive is accepted. */ static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], struct ac_position_w_info *w, bool cull_front, bool cull_back, bool cull_zero_area) { LLVMBuilderRef builder = ctx->builder; if (cull_front && cull_back) return ctx->i1false; if (!cull_front && !cull_back && !cull_zero_area) return ctx->i1true; /* Front/back face culling. Also if the determinant == 0, the triangle * area is 0. */ LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, ""); LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, ""); LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, ""); /* Negative W negates the determinant. */ det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, ""); LLVMValueRef accepted = NULL; if (cull_front) { LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); } else if (cull_back) { LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); } else if (cull_zero_area) { accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); } return accepted; }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(dst_type.floating); mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if(src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); } bias_ = lp_build_const_vec(dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; }
static LLVMValueRef translateFloatBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) { switch (Op) { case SumOp: return LLVMBuildFAdd(Builder, ValueE1, ValueE2, ""); case SubOp: return LLVMBuildFSub(Builder, ValueE1, ValueE2, ""); case MultOp: return LLVMBuildFMul(Builder, ValueE1, ValueE2, ""); case DivOp: return LLVMBuildFDiv(Builder, ValueE1, ValueE2, ""); case LtOp: return LLVMBuildFCmp(Builder, LLVMRealOLT, ValueE1, ValueE2, ""); case LeOp: return LLVMBuildFCmp(Builder, LLVMRealOLE, ValueE1, ValueE2, ""); case GtOp: return LLVMBuildFCmp(Builder, LLVMRealOGT, ValueE1, ValueE2, ""); case GeOp: return LLVMBuildFCmp(Builder, LLVMRealOGE, ValueE1, ValueE2, ""); case EqOp: return LLVMBuildFCmp(Builder, LLVMRealOEQ, ValueE1, ValueE2, ""); case DiffOp: return LLVMBuildFCmp(Builder, LLVMRealONE, ValueE1, ValueE2, ""); default: return NULL; } }
/** * Converts int16 half-float to float32 * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?) * [llvm.x86.vcvtph2ps / _mm_cvtph_ps] * * @param src_type <vector> type of int16 * @param src value to convert * * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ */ LLVMValueRef lp_build_half_to_float(struct gallivm_state *gallivm, struct lp_type src_type, LLVMValueRef src) { struct lp_type f32_type = lp_type_float_vec(32, 32 * src_type.length); struct lp_type i32_type = lp_type_int_vec(32, 32 * src_type.length); LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, f32_type); /* Constants */ LLVMValueRef i32_13 = lp_build_const_int_vec(gallivm, i32_type, 13); LLVMValueRef i32_16 = lp_build_const_int_vec(gallivm, i32_type, 16); LLVMValueRef i32_mask_nosign = lp_build_const_int_vec(gallivm, i32_type, 0x7fff); LLVMValueRef i32_was_infnan = lp_build_const_int_vec(gallivm, i32_type, 0x7bff); LLVMValueRef i32_exp_infnan = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); LLVMValueRef f32_magic = LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, i32_type, (254 - 15) << 23), float_vec_type, ""); /* Convert int16 vector to int32 vector by zero ext */ LLVMValueRef h = LLVMBuildZExt(builder, src, int_vec_type, ""); /* Exponent / mantissa bits */ LLVMValueRef expmant = LLVMBuildAnd(builder, i32_mask_nosign, h, ""); LLVMValueRef shifted = LLVMBuildBitCast(builder, LLVMBuildShl(builder, expmant, i32_13, ""), float_vec_type, ""); /* Exponent adjust */ LLVMValueRef scaled = LLVMBuildBitCast(builder, LLVMBuildFMul(builder, shifted, f32_magic, ""), int_vec_type, ""); /* Make sure Inf/NaN survive */ LLVMValueRef b_wasinfnan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, expmant, i32_was_infnan); LLVMValueRef infnanexp = LLVMBuildAnd(builder, b_wasinfnan, i32_exp_infnan, ""); /* Sign bit */ LLVMValueRef justsign = LLVMBuildXor(builder, h, expmant, ""); LLVMValueRef sign = LLVMBuildShl(builder, justsign, i32_16, ""); /* Combine result */ LLVMValueRef sign_inf = LLVMBuildOr(builder, sign, infnanexp, ""); LLVMValueRef final = LLVMBuildOr(builder, scaled, sign_inf, ""); /* Cast from int32 vector to float32 vector */ return LLVMBuildBitCast(builder, final, float_vec_type, ""); }
/** * Unpack several pixels in SoA. * * It takes a vector of packed pixels: * * packed = {P0, P1, P2, P3, ..., Pn} * * And will produce four vectors: * * red = {R0, R1, R2, R3, ..., Rn} * green = {G0, G1, G2, G3, ..., Gn} * blue = {B0, B1, B2, B3, ..., Bn} * alpha = {A0, A1, A2, A3, ..., An} * * It requires that a packed pixel fits into an element of the output * channels. The common case is when converting pixel with a depth of 32 bit or * less into floats. * * \param format_desc the format of the 'packed' incoming pixel vector * \param type the desired type for rgba_out (type.length = n, above) * \param packed the incoming vector of packed pixels * \param rgba_out returns the SoA R,G,B,A vectors */ void lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef packed, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context bld; LLVMValueRef inputs[4]; unsigned chan; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= type.width); /* FIXME: Support more output types */ assert(type.width == 32); lp_build_context_init(&bld, gallivm, type); /* Decode the input vector components */ for (chan = 0; chan < format_desc->nr_channels; ++chan) { const unsigned width = format_desc->channel[chan].size; const unsigned start = format_desc->channel[chan].shift; const unsigned stop = start + width; LLVMValueRef input; input = packed; switch(format_desc->channel[chan].type) { case UTIL_FORMAT_TYPE_VOID: input = lp_build_undef(gallivm, type); break; case UTIL_FORMAT_TYPE_UNSIGNED: /* * Align the LSB */ if (start) { input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); } /* * Zero the MSBs */ if (stop < format_desc->block.bits) { unsigned mask = ((unsigned long long)1 << width) - 1; input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); } /* * Type conversion */ if (type.floating) { if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { assert(width == 8); if (format_desc->swizzle[3] == chan) { input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); } else { struct lp_type conv_type = lp_uint_type(type); input = lp_build_srgb_to_linear(gallivm, conv_type, input); } } else { if(format_desc->channel[chan].normalized) input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); else input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_SIGNED: /* * Align the sign bit first. */ if (stop < type.width) { unsigned bits = type.width - stop; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildShl(builder, input, bits_val, ""); } /* * Align the LSB (with an arithmetic shift to preserve the sign) */ if (format_desc->channel[chan].size < type.width) { unsigned bits = type.width - format_desc->channel[chan].size; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildAShr(builder, input, bits_val, ""); } /* * Type conversion */ if (type.floating) { input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildFMul(builder, input, scale_val, ""); /* the formula above will produce value below -1.0 for most negative * value but everything seems happy with that hence disable for now */ if (0) input = lp_build_max(&bld, input, lp_build_const_vec(gallivm, type, -1.0f)); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_FLOAT: if (type.floating) { assert(start == 0); assert(stop == 32); assert(type.width == 32); input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; case UTIL_FORMAT_TYPE_FIXED: if (type.floating) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; default: assert(0); input = lp_build_undef(gallivm, type); break; } inputs[chan] = input; } lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); }
/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member * to the lp_type union. */ void lp_build_conv(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { LLVMBuilderRef builder = gallivm->builder; struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); assert(num_srcs <= LP_MAX_VECTOR_LENGTH); assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) { assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; } num_tmps = num_srcs; /* Special case 4x4f --> 1x16ub */ if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 4 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 4 * num_dsts == num_srcs && util_cpu_caps.has_sse2) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i, j; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 4) { LLVMValueRef lo, hi; for (j = 0; j < 4; ++j) { tmp[j] = LLVMBuildFMul(builder, src[j], const_255f, ""); tmp[j] = lp_build_iround(&bld, tmp[j]); } /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Special case 2x8f --> 1x16ub */ else if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 8 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 2 * num_dsts == num_srcs && util_cpu_caps.has_avx) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 2) { LLVMValueRef lo, hi, a, b; a = LLVMBuildFMul(builder, src[0], const_255f, ""); b = LLVMBuildFMul(builder, src[1], const_255f, ""); a = lp_build_iround(&bld, a); b = lp_build_iround(&bld, b); tmp[0] = lp_build_extract_range(gallivm, a, 0, 4); tmp[1] = lp_build_extract_range(gallivm, a, 4, 4); tmp[2] = lp_build_extract_range(gallivm, b, 0, 4); tmp[3] = lp_build_extract_range(gallivm, b, 4, 4); /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Pre convert half-floats to floats */ else if (src_type.floating && src_type.width == 16) { for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_half_to_float(gallivm, src_type, tmp[i]); tmp_type.width = 32; } /* * Clamp if necessary */ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); double src_max = lp_const_max(src_type); double dst_max = lp_const_max(dst_type); LLVMValueRef thres; lp_build_context_init(&bld, gallivm, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else thres = lp_build_const_vec(gallivm, src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } if(src_max > dst_max) { if(dst_max == 1.0) thres = bld.one; else thres = lp_build_const_vec(gallivm, src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } } /* * Scale to the narrowest range */ if(dst_type.floating) { /* Nothing to do */ } else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_clamped_float_to_unsigned_norm(gallivm, tmp_type, dst_type.width, tmp[i]); } tmp_type.floating = FALSE; } else { double dst_scale = lp_const_scale(dst_type); LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); /* Compensate for different offsets */ if (dst_offset > src_offset && src_type.width > dst_type.width) { for (i = 0; i < num_tmps; ++i) { LLVMValueRef shifted; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1); if(src_type.sign) shifted = LLVMBuildAShr(builder, tmp[i], shift, ""); else shifted = LLVMBuildLShr(builder, tmp[i], shift, ""); tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, ""); } } if(src_shift > dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); else tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); } } /* * Truncate or expand bit width * * No data conversion should happen here, although the sign bits are * crucial to avoid bad clamping. */ { struct lp_type new_type; new_type = tmp_type; new_type.sign = dst_type.sign; new_type.width = dst_type.width; new_type.length = dst_type.length; lp_build_resize(gallivm, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); tmp_type = new_type; num_tmps = num_dsts; } /* * Scale to the widest range */ if(src_type.floating) { /* Nothing to do */ } else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_unsigned_norm_to_float(gallivm, src_type.width, dst_type, tmp[i]); } tmp_type.floating = TRUE; } else { double src_scale = lp_const_scale(src_type); LLVMTypeRef tmp_vec_type; /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); #endif } if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); if (src_shift < dst_shift) { LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift); for (i = 0; i < num_tmps; ++i) { pre_shift[i] = tmp[i]; tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } /* Compensate for different offsets */ if (dst_offset > src_offset) { for (i = 0; i < num_tmps; ++i) { tmp[i] = LLVMBuildSub(builder, tmp[i], pre_shift[i], ""); } } } } for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; assert(lp_check_value(dst_type, dst[i])); } }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(dst_type.floating); mantissa = lp_mantissa(dst_type); if (src_width <= (mantissa + 1)) { /* * The source width matches fits what can be represented in floating * point (i.e., mantissa + 1 bits). So do a straight multiplication * followed by casting. No further rounding is necessary. */ scale = 1.0/(double)((1ULL << src_width) - 1); res = LLVMBuildSIToFP(builder, src, vec_type, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); return res; } else { /* * The source width exceeds what can be represented in floating * point. So truncate the incoming values. */ n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if (src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(gallivm, dst_type, shift), ""); } bias_ = lp_build_const_vec(gallivm, dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); } return res; }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. * * Ex: src = { float, float, float, float } * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type); LLVMValueRef res; unsigned mantissa; assert(src_type.floating); assert(dst_width <= src_type.width); src_type.sign = FALSE; mantissa = lp_mantissa(src_type); if (dst_width <= mantissa) { /* * Apply magic coefficients that will make the desired result to appear * in the lowest significant bits of the mantissa, with correct rounding. * * This only works if the destination width fits in the mantissa. */ unsigned long long ubound; unsigned long long mask; double scale; double bias; ubound = (1ULL << dst_width); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)(1ULL << (mantissa - dst_width)); res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(gallivm, src_type, mask), ""); } else if (dst_width == (mantissa + 1)) { /* * The destination width matches exactly what can be represented in * floating point (i.e., mantissa + 1 bits). So do a straight * multiplication followed by casting. No further rounding is necessary. */ double scale; scale = (double)((1ULL << dst_width) - 1); res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); } else { /* * The destination exceeds what can be represented in the floating point. * So multiply by the largest power two we get away with, and when * subtract the most significant bit to rescale to normalized values. * * The largest power of two factor we can get away is * (1 << (src_type.width - 1)), because we need to use signed . In theory it * should be (1 << (src_type.width - 2)), but IEEE 754 rules states * INT_MIN should be returned in FPToSI, which is the correct result for * values near 1.0! * * This means we get (src_type.width - 1) correct bits for values near 0.0, * and (mantissa + 1) correct bits for values near 1.0. Equally or more * important, we also get exact results for 0.0 and 1.0. */ unsigned n = MIN2(src_type.width - 1, dst_width); double scale = (double)(1ULL << n); unsigned lshift = dst_width - n; unsigned rshift = n; LLVMValueRef lshifted; LLVMValueRef rshifted; res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); /* * Align the most significant bit to its final place. * * This will cause 1.0 to overflow to 0, but the later adjustment will * get it right. */ if (lshift) { lshifted = LLVMBuildShl(builder, res, lp_build_const_int_vec(gallivm, src_type, lshift), ""); } else { lshifted = res; } /* * Align the most significant bit to the right. */ rshifted = LLVMBuildLShr(builder, res, lp_build_const_int_vec(gallivm, src_type, rshift), ""); /* * Subtract the MSB to the LSB, therefore re-scaling from * (1 << dst_width) to ((1 << dst_width) - 1). */ res = LLVMBuildSub(builder, lshifted, rshifted, ""); } return res; }
/** * Initialize the bld->a, dadq fields. This involves fetching * those values from the arrays which are passed into the JIT function. */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr) { struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; struct gallivm_state *gallivm = coeff_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef pixoffx, pixoffy; unsigned attrib; unsigned chan; unsigned i; pixoffx = coeff_bld->undef; pixoffy = coeff_bld->undef; for (i = 0; i < coeff_bld->type.length; i++) { LLVMValueRef nr = lp_build_const_int32(gallivm, i); LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); } for (attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; LLVMValueRef index = lp_build_const_int32(gallivm, attrib * TGSI_NUM_CHANNELS); LLVMValueRef ptr; LLVMValueRef dadxaos = setup_bld->zero; LLVMValueRef dadyaos = setup_bld->zero; LLVMValueRef a0aos = setup_bld->zero; /* always fetch all 4 values for performance/simplicity */ switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadxaos = LLVMBuildLoad(builder, ptr, ""); ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadyaos = LLVMBuildLoad(builder, ptr, ""); attrib_name(dadxaos, attrib, 0, ".dadxaos"); attrib_name(dadyaos, attrib, 0, ".dadyaos"); /* fall-through */ case LP_INTERP_CONSTANT: case LP_INTERP_FACING: ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); a0aos = LLVMBuildLoad(builder, ptr, ""); attrib_name(a0aos, attrib, 0, ".a0aos"); break; case LP_INTERP_POSITION: /* Nothing to do as the position coeffs are already setup in slot 0 */ continue; default: assert(0); break; } /* * a = a0 + (x * dadx + y * dady) * a0aos is the attrib value at top left corner of stamp */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x); LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y); a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos); a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos); } /* * dadq = {0, dadx, dady, dadx + dady} * for two quads (side by side) this is: * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} */ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { /* this generates a CRAPLOAD of shuffles... */ if (mask & (1 << chan)) { LLVMValueRef dadx, dady; LLVMValueRef dadq, dadq2; LLVMValueRef a; LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); if (attrib == 0 && chan == 0) { a = bld->x; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dadx = coeff_bld->one; dady = coeff_bld->zero; } else if (attrib == 0 && chan == 1) { a = bld->y; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dady = coeff_bld->one; dadx = coeff_bld->zero; } else { dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadxaos, chan_index); dady = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadyaos, chan_index); /* * a = {a, a, a, a} */ a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, a0aos, chan_index); } dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); dady = LLVMBuildFMul(builder, dady, pixoffy, ""); dadq = LLVMBuildFAdd(builder, dadx, dady, ""); /* * Compute the attrib values on the upper-left corner of each * group of quads. * Note that if we process 2 quads at once this doesn't * really exactly to what we want. * We need to access elem 0 and 2 respectively later if we process * 2 quads at once. */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); a = LLVMBuildFAdd(builder, a, dadq2, ""); } #if PERSPECTIVE_DIVIDE_PER_QUAD /* * a *= 1 / w */ /* * XXX since we're only going to access elements 0,2 out of 8 * if we have 8-wide vectors we should do the division only 4-wide. * a is really a 2-elements in a 4-wide vector disguised as 8-wide * in this case. */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); if (!bld->oow) { bld->oow = lp_build_rcp(coeff_bld, w); lp_build_name(bld->oow, "oow"); } a = lp_build_mul(coeff_bld, a, bld->oow); } #endif attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); bld->a[attrib][chan] = lp_build_alloca(gallivm, LLVMTypeOf(a), ""); LLVMBuildStore(builder, a, bld->a[attrib][chan]); bld->dadq[attrib][chan] = dadq; } } } }
/** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. * If we're using nearest miplevel sampling the '1' values will be null/unused. */ static void lp_build_sample_mipmap(struct lp_build_sample_context *bld, unsigned img_filter, unsigned mip_filter, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, LLVMValueRef ilevel0, LLVMValueRef ilevel1, LLVMValueRef lod_fpart, LLVMValueRef colors_lo_var, LLVMValueRef colors_hi_var) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef size0; LLVMValueRef size1; LLVMValueRef row_stride0_vec; LLVMValueRef row_stride1_vec; LLVMValueRef img_stride0_vec; LLVMValueRef img_stride1_vec; LLVMValueRef data_ptr0; LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, &size0, &row_stride0_vec, &img_stride0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } /* Store the first level's colors in the output variables */ LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0); LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32); struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); /* need_lerp = lod_fpart > 0 */ need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, lod_fpart, LLVMConstNull(i32_type), "need_lerp"); lp_build_if(&if_ctx, bld->gallivm, need_lerp); { struct lp_build_context h16_bld; lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, &size1, &row_stride1_vec, &img_stride1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } /* interpolate samples from the two mipmap levels */ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); #if HAVE_LLVM == 0x208 /* This is a work-around for a bug in LLVM 2.8. * Evidently, something goes wrong in the construction of the * lod_fpart short[8] vector. Adding this no-effect shuffle seems * to force the vector to be properly constructed. * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). */ { LLVMValueRef shuffles[8], shuffle; int i; assert(h16_bld.type.length <= Elements(shuffles)); for (i = 0; i < h16_bld.type.length; i++) shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); shuffle = LLVMConstVector(shuffles, h16_bld.type.length); lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, lod_fpart, shuffle, ""); } #endif colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, colors0_lo, colors1_lo); colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, colors0_hi, colors1_hi); LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); } lp_build_endif(&if_ctx); } }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. * * Ex: src = { float, float, float, float } * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(src_type.floating); mantissa = lp_mantissa(src_type); /* We cannot carry more bits than the mantissa */ n = MIN2(mantissa, dst_width); /* This magic coefficients will make the desired result to appear in the * lowest significant bits of the mantissa. */ ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ /* YES: this fixes progs/trivial/tri-z-eq.c. * Otherwise vertex Z=1.0 values get converted to something like * 0xfffffb00 and the test for equality with 0xffffffff fails. */ #if 0 { LLVMValueRef msb; msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); return res; }
/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member * to the lp_type union. */ void lp_build_conv(LLVMBuilderRef builder, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); assert(num_srcs <= LP_MAX_VECTOR_LENGTH); assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) { assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; } num_tmps = num_srcs; /* * Clamp if necessary */ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); double src_max = lp_const_max(src_type); double dst_max = lp_const_max(dst_type); LLVMValueRef thres; lp_build_context_init(&bld, builder, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else thres = lp_build_const_vec(src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } if(src_max > dst_max) { if(dst_max == 1.0) thres = bld.one; else thres = lp_build_const_vec(src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } } /* * Scale to the narrowest range */ if(dst_type.floating) { /* Nothing to do */ } else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder, tmp_type, dst_type.width, tmp[i]); } tmp_type.floating = FALSE; } else { double dst_scale = lp_const_scale(dst_type); LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; tmp_vec_type = lp_build_vec_type(tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); /* FIXME: compensate different offsets too */ if(src_shift > dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); else tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); } } /* * Truncate or expand bit width * * No data conversion should happen here, although the sign bits are * crucial to avoid bad clamping. */ { struct lp_type new_type; new_type = tmp_type; new_type.sign = dst_type.sign; new_type.width = dst_type.width; new_type.length = dst_type.length; lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); tmp_type = new_type; num_tmps = num_dsts; } /* * Scale to the widest range */ if(src_type.floating) { /* Nothing to do */ } else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_unsigned_norm_to_float(builder, src_type.width, dst_type, tmp[i]); } tmp_type.floating = TRUE; } else { double src_scale = lp_const_scale(src_type); LLVMTypeRef tmp_vec_type; /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; tmp_vec_type = lp_build_vec_type(tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); #endif } if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); /* FIXME: compensate different offsets too */ if(src_shift < dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(tmp_type, dst_shift - src_shift); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } } for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; assert(lp_check_value(dst_type, dst[i])); } }
void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg) { LLVMBuilderRef builder = ctx->builder; struct cube_selection_coords selcoords; LLVMValueRef coords[3]; LLVMValueRef invma; build_cube_intrinsic(ctx, coords_arg, &selcoords); invma = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); for (int i = 0; i < 2; ++i) coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); coords[2] = selcoords.id; if (is_deriv && derivs_arg) { LLVMValueRef derivs[4]; int axis; /* Convert cube derivatives to 2D derivatives. */ for (axis = 0; axis < 2; axis++) { LLVMValueRef deriv_st[2]; LLVMValueRef deriv_ma; /* Transform the derivative alongside the texture * coordinate. Mathematically, the correct formula is * as follows. Assume we're projecting onto the +Z face * and denote by dx/dh the derivative of the (original) * X texture coordinate with respect to horizontal * window coordinates. The projection onto the +Z face * plane is: * * f(x,z) = x/z * * Then df/dh = df/dx * dx/dh + df/dz * dz/dh * = 1/z * dx/dh - x/z * 1/z * dz/dh. * * This motivatives the implementation below. * * Whether this actually gives the expected results for * apps that might feed in derivatives obtained via * finite differences is anyone's guess. The OpenGL spec * seems awfully quiet about how textureGrad for cube * maps should be handled. */ build_cube_select(builder, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma); deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); for (int i = 0; i < 2; ++i) derivs[axis * 2 + i] = LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""), LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); } memcpy(derivs_arg, derivs, sizeof(derivs)); } /* Shift the texture coordinate. This must be applied after the * derivative calculation. */ for (int i = 0; i < 2; ++i) coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); if (is_array) { /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ /* coords_arg.w component - array_index for cube arrays */ LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); } memcpy(coords_arg, coords, sizeof(coords)); }
/** * Unpack a single pixel into its RGBA components. * * @param desc the pixel format for the packed pixel value * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ static INLINE LLVMValueRef lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef packed) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; boolean normalized; boolean needs_uitofp; unsigned shift; unsigned i; /* TODO: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); /* Broadcast the packed value to all four channels * before: packed = BGRA * after: packed = {BGRA, BGRA, BGRA, BGRA} */ packed = LLVMBuildInsertElement(builder, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), packed, LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), ""); packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), ""); /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; shift = 0; /* Loop over 4 color components */ for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned long long mask = (1ULL << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); if (bits == 32) { needs_uitofp = TRUE; } shifts[i] = lp_build_const_int32(gallivm, shift); masks[i] = lp_build_const_int32(gallivm, mask); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, 1.0 / mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } shift += bits; } /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} * into masked = {B, G, R, A} */ shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); if (!needs_uitofp) { /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } else { casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } /* At this point 'casted' may be a vector of floats such as * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized * we'll scale this to {1.0, 1.0, 1.0, 1.0}. */ if (normalized) scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; return scaled; }
/** * Pack a single pixel. * * @param rgba 4 float vector with the unpacked components. * * XXX: This is mostly for reference and testing -- operating a single pixel at * a time is rarely if ever needed. */ LLVMValueRef lp_build_pack_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef rgba) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; LLVMValueRef shifted, casted, scaled, unswizzled; LLVMValueRef shifts[4]; LLVMValueRef scales[4]; boolean normalized; unsigned shift; unsigned i, j; assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); /* Unswizzle the color components into the source vector. */ for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { if (desc->swizzle[j] == i) break; } if (j < 4) swizzles[i] = lp_build_const_int32(gallivm, j); else swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); } unswizzled = LLVMBuildShuffleVector(builder, rgba, LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), LLVMConstVector(swizzles, 4), ""); normalized = FALSE; shift = 0; for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned mask = (1 << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(bits < 32); shifts[i] = lp_build_const_int32(gallivm, shift); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } shift += bits; } if (normalized) scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); /* Bitwise or all components */ for (i = 0; i < 4; ++i) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, lp_build_const_int32(gallivm, i), ""); if (packed) packed = LLVMBuildOr(builder, packed, component, ""); else packed = component; } } if (!packed) packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); if (desc->block.bits < 32) packed = LLVMBuildTrunc(builder, packed, type, ""); return packed; }
static LLVMValueRef lp_build_extract_soa_chan(struct lp_build_context *bld, unsigned blockbits, boolean srgb_chan, struct util_format_channel_description chan_desc, LLVMValueRef packed) { struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_type type = bld->type; LLVMValueRef input = packed; const unsigned width = chan_desc.size; const unsigned start = chan_desc.shift; const unsigned stop = start + width; /* Decode the input vector component */ switch(chan_desc.type) { case UTIL_FORMAT_TYPE_VOID: input = bld->undef; break; case UTIL_FORMAT_TYPE_UNSIGNED: /* * Align the LSB */ if (start) { input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); } /* * Zero the MSBs */ if (stop < blockbits) { unsigned mask = ((unsigned long long)1 << width) - 1; input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); } /* * Type conversion */ if (type.floating) { if (srgb_chan) { struct lp_type conv_type = lp_uint_type(type); input = lp_build_srgb_to_linear(gallivm, conv_type, width, input); } else { if(chan_desc.normalized) input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); else input = LLVMBuildSIToFP(builder, input, bld->vec_type, ""); } } else if (chan_desc.pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_SIGNED: /* * Align the sign bit first. */ if (stop < type.width) { unsigned bits = type.width - stop; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildShl(builder, input, bits_val, ""); } /* * Align the LSB (with an arithmetic shift to preserve the sign) */ if (chan_desc.size < type.width) { unsigned bits = type.width - chan_desc.size; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildAShr(builder, input, bits_val, ""); } /* * Type conversion */ if (type.floating) { input = LLVMBuildSIToFP(builder, input, bld->vec_type, ""); if (chan_desc.normalized) { double scale = 1.0 / ((1 << (chan_desc.size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildFMul(builder, input, scale_val, ""); /* * The formula above will produce value below -1.0 for most negative * value but everything seems happy with that hence disable for now. */ if (0) input = lp_build_max(bld, input, lp_build_const_vec(gallivm, type, -1.0f)); } } else if (chan_desc.pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_FLOAT: if (type.floating) { if (chan_desc.size == 16) { struct lp_type f16i_type = type; f16i_type.width /= 2; f16i_type.floating = 0; if (start) { input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); } input = LLVMBuildTrunc(builder, input, lp_build_vec_type(gallivm, f16i_type), ""); input = lp_build_half_to_float(gallivm, input); } else { assert(start == 0); assert(stop == 32); assert(type.width == 32); } input = LLVMBuildBitCast(builder, input, bld->vec_type, ""); } else { /* FIXME */ assert(0); input = bld->undef; } break; case UTIL_FORMAT_TYPE_FIXED: if (type.floating) { double scale = 1.0 / ((1 << (chan_desc.size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildSIToFP(builder, input, bld->vec_type, ""); input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ assert(0); input = bld->undef; } break; default: assert(0); input = bld->undef; break; } return input; }